From 4ecfc6590bf609cc509d98f9e75bf3b668f21aa5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 01:42:58 +0100 Subject: [PATCH 01/22] Loop-based impl --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarmarch.cpp | 62 +++++++++++++++++++ src/coreclr/jit/codegenloongarch64.cpp | 16 +++++ src/coreclr/jit/codegenriscv64.cpp | 16 +++++ src/coreclr/jit/codegenxarch.cpp | 48 ++++++++++++++ src/coreclr/jit/gentree.cpp | 5 ++ src/coreclr/jit/gentree.h | 10 +-- src/coreclr/jit/lowerarmarch.cpp | 3 +- src/coreclr/jit/lowerloongarch64.cpp | 3 +- src/coreclr/jit/lowerriscv64.cpp | 3 +- src/coreclr/jit/lowerxarch.cpp | 6 +- src/coreclr/jit/lsraarmarch.cpp | 5 ++ src/coreclr/jit/lsrabuild.cpp | 1 + src/coreclr/jit/lsraloongarch64.cpp | 5 ++ src/coreclr/jit/lsrariscv64.cpp | 5 ++ src/coreclr/jit/lsraxarch.cpp | 5 ++ .../JIT/opt/Structs/StructWithGC_Zeroing.cs | 30 +++++++++ .../opt/Structs/StructWithGC_Zeroing.csproj | 14 +++++ 18 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs create mode 100644 src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 762245840f930..a7a67e17cfae6 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1228,6 +1228,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifndef TARGET_X86 void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode); #endif + void genCodeForInitBlkLoop(GenTreeBlk* initBlkNode); void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode); void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode); void genJumpTable(GenTree* tree); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 60c6e65bbea6f..c23154a7146e4 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3243,6 +3243,63 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + GenTree* const dstNode = initBlkNode->Addr(); + genConsumeReg(dstNode); + const regNumber dstReg = dstNode->GetRegNum(); + +#ifdef TARGET_ARM64 + const regNumber zeroReg = REG_ZR; +#else + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); + const regNumber zeroReg = zeroNode->GetRegNum(); +#endif + + if (initBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + // mov offsetReg, + //.LOOP: + // str xzr, [dstReg, offsetReg] + // subs offsetReg, offsetReg, #8 + // bne .LOOP + // str xzr, [dstReg] + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); +#ifdef TARGET_ARM64 + GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); +#else + // There is no subs (sets flags) instruction on ARM32, so we use sub and cmp instead. + GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); + GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); +#endif + inst_JMP(EJ_ne, loop); + } + GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); +} + //------------------------------------------------------------------------ // genCall: Produce code for a GT_CALL node // @@ -4513,6 +4570,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) genCodeForCpObj(blkOp->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(blkOp); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!blkOp->gtBlkOpGcUnsafe); if (isCopyBlk) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 44edc3fae0fee..49da4a7b4480b 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6386,6 +6386,17 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + // TODO: +} + // Generate code for a load from some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load @@ -7297,6 +7308,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) genCodeForCpObj(blkOp->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(blkOp); + break; + case GenTreeBlk::BlkOpKindHelper: if (isCopyBlk) { diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index c2dafce1fb0a6..025a4883ee0c6 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6059,6 +6059,17 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) } } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + // TODO: +} + //------------------------------------------------------------------------ // genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call // @@ -6948,6 +6959,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) genCodeForCpObj(blkOp->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(blkOp); + break; + case GenTreeBlk::BlkOpKindHelper: if (isCopyBlk) { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 06a44d4b3158a..cd57a5d6bbea1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3033,6 +3033,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) genCodeForCpObj(storeBlkNode->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(storeBlkNode); + break; + #ifdef TARGET_AMD64 case GenTreeBlk::BlkOpKindHelper: assert(!storeBlkNode->gtBlkOpGcUnsafe); @@ -3311,6 +3316,49 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) #endif } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + GenTree* const dstNode = initBlkNode->Addr(); + GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); + genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); + const regNumber zeroReg = zeroNode->GetRegNum(); + + // xor zeroReg, zeroReg + // mov offsetReg, + //.LOOP: + // mov qword ptr [dstReg + offsetReg], zeroReg + // sub offsetReg, 8 + // jne .LOOP + // mov qword ptr [dstReg], zeroReg + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); + GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); + inst_JMP(EJ_jne, loop); + } + GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); +} + #ifdef TARGET_AMD64 //------------------------------------------------------------------------ // genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2a6505d973b34..3a58958e6ddd2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -12436,6 +12436,11 @@ void Compiler::gtDispTree(GenTree* tree, printf(" (Helper)"); break; #endif + + case GenTreeBlk::BlkOpKindLoop: + printf(" (Loop)"); + break; + default: unreached(); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ceb7399766384..fa746d71c5697 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7323,12 +7323,11 @@ struct GenTreeBlk : public GenTreeIndir #ifdef TARGET_XARCH BlkOpKindCpObjRepInstr, #endif -#ifndef TARGET_X86 BlkOpKindHelper, -#endif #ifdef TARGET_XARCH BlkOpKindRepInstr, #endif + BlkOpKindLoop, BlkOpKindUnroll, BlkOpKindUnrollMemmove, } gtBlkOpKind; @@ -7337,12 +7336,15 @@ struct GenTreeBlk : public GenTreeIndir bool gtBlkOpGcUnsafe; #endif -#ifdef TARGET_XARCH bool IsOnHeapAndContainsReferences() { return (m_layout != nullptr) && m_layout->HasGCPtr() && !Addr()->OperIs(GT_LCL_ADDR); } -#endif + + bool IsZeroingGcPointersOnHeap() + { + return OperIs(GT_STORE_BLK) && Data()->IsIntegralConst(0) && IsOnHeapAndContainsReferences(); + } GenTreeBlk(genTreeOps oper, var_types type, GenTree* addr, ClassLayout* layout) : GenTreeIndir(oper, type, addr, nullptr) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 1c4d20a52eed2..6313c11ee607f 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -610,7 +610,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 8ddccb2ae800a..be3db6e409759 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -328,7 +328,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp index 3674cd2075a39..c25c466330aa7 100644 --- a/src/coreclr/jit/lowerriscv64.cpp +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -276,7 +276,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 840794c0cc35c..7e9bbbfdb3114 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -397,10 +397,12 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { TOO_BIG_TO_UNROLL: #ifdef TARGET_AMD64 - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; #else // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86 - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindRepInstr; #endif } } diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index a7e6fe5345450..c3d0a26df5552 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -624,6 +624,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) #endif // TARGET_ARM64 break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f3aaba0847f68..824555ed1840e 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -968,6 +968,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) #endif case GenTreeBlk::BlkOpKindUnrollMemmove: case GenTreeBlk::BlkOpKindUnroll: + case GenTreeBlk::BlkOpKindLoop: case GenTreeBlk::BlkOpKindInvalid: // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE break; diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 418ba90d9479b..67b27aa51300c 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1099,6 +1099,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 2cf4511622772..0747cef99deac 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1142,6 +1142,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 322b6e4ef02c3..4b2a38ea130e3 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1394,6 +1394,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) sizeRegMask = RBM_RCX; break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + #ifdef TARGET_AMD64 case GenTreeBlk::BlkOpKindHelper: dstAddrRegMask = RBM_ARG_0; diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs new file mode 100644 index 0000000000000..8fcc19a6a502a --- /dev/null +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public class StructWithGC_Zeroing +{ + [Fact] + public static void StructZeroingShouldNotUseMemset() + { + var largeStructWithGc = new LargeStructWithGC(); + Test(ref largeStructWithGc); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Test(ref LargeStructWithGC s) + { + // X64-NOT: CORINFO_HELP_MEMSET + s = default; + } + + struct LargeStructWithGC + { + public byte x; + public string a; + public long b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, r, s, t, u, v, w, z; + } +} diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj new file mode 100644 index 0000000000000..30458a4a8ca2d --- /dev/null +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj @@ -0,0 +1,14 @@ + + + true + + + None + True + + + + true + + + From cb23e5c04d79d3a93b4601b5a3421bd1fd6b17f8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 02:08:35 +0100 Subject: [PATCH 02/22] Add asserts --- src/coreclr/jit/codegenloongarch64.cpp | 2 +- src/coreclr/jit/codegenriscv64.cpp | 2 +- src/coreclr/jit/gentree.h | 11 ++++++++++- src/coreclr/jit/lower.cpp | 9 +++++++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 49da4a7b4480b..e4c949980475d 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6394,7 +6394,7 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: + // TODO: LoongArch64 impl } // Generate code for a load from some address + offset diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 025a4883ee0c6..7bbbfdc1fc6cd 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6067,7 +6067,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: + // TODO: RISC-V impl } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index fa746d71c5697..97bf787b44bc9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7336,9 +7336,14 @@ struct GenTreeBlk : public GenTreeIndir bool gtBlkOpGcUnsafe; #endif + bool ContainsReferences() + { + return (m_layout != nullptr) && m_layout->HasGCPtr(); + } + bool IsOnHeapAndContainsReferences() { - return (m_layout != nullptr) && m_layout->HasGCPtr() && !Addr()->OperIs(GT_LCL_ADDR); + return ContainsReferences() && !Addr()->OperIs(GT_LCL_ADDR); } bool IsZeroingGcPointersOnHeap() @@ -7355,6 +7360,10 @@ struct GenTreeBlk : public GenTreeIndir GenTreeBlk(genTreeOps oper, var_types type, GenTree* addr, GenTree* data, ClassLayout* layout) : GenTreeIndir(oper, type, addr, data) { + if (data->IsIntegralConst(0)) + { + data->gtFlags |= GTF_DONT_CSE; + } Initialize(layout); } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 4f70bd9bc6b0a..9c682698a9a92 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8867,6 +8867,15 @@ void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode) { assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK)); + if (blkNode->ContainsReferences() && !blkNode->OperIsCopyBlkOp()) + { + // Make sure we don't use GT_STORE_DYN_BLK + assert(blkNode->OperIs(GT_STORE_BLK)); + + // and we only zero it (and that zero is better to be not hoisted/CSE'd) + assert(blkNode->Data()->IsIntegralConst(0)); + } + // Lose the type information stored in the source - we no longer need it. if (blkNode->Data()->OperIs(GT_BLK)) { From 063cd10668127b82d1827a3e46b87b1045bb877b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 02:41:48 +0100 Subject: [PATCH 03/22] Add another test case --- src/coreclr/jit/gentree.h | 2 + .../JIT/opt/Structs/StructWithGC_Zeroing.cs | 40 +++++++++++++++---- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 97bf787b44bc9..c1854002ddab2 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7323,7 +7323,9 @@ struct GenTreeBlk : public GenTreeIndir #ifdef TARGET_XARCH BlkOpKindCpObjRepInstr, #endif +#ifndef TARGET_X86 BlkOpKindHelper, +#endif #ifdef TARGET_XARCH BlkOpKindRepInstr, #endif diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs index 8fcc19a6a502a..e5a2db11ec09f 100644 --- a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs @@ -10,21 +10,47 @@ public class StructWithGC_Zeroing [Fact] public static void StructZeroingShouldNotUseMemset() { - var largeStructWithGc = new LargeStructWithGC(); - Test(ref largeStructWithGc); + LargeStructWithGC ls1 = default; + ls1.str = "hello1"; + ls1.z2 = long.MaxValue; + ZeroIt(ref ls1); + + LargeStructWithGC2 ls2 = default; + ls2.str = "hello2"; + ls2.z1 = long.MinValue; + ZeroIt(ref ls2); + + if (ls1.str != null || ls2.str != null || ls1.z2 != 0 || ls2.z1 != 0) + throw new InvalidOperationException("should be zeroed"); } [MethodImpl(MethodImplOptions.NoInlining)] - private static void Test(ref LargeStructWithGC s) + private static void ZeroIt(ref LargeStructWithGC s) { // X64-NOT: CORINFO_HELP_MEMSET + // ARM64-NOT: CORINFO_HELP_MEMSET s = default; } - struct LargeStructWithGC + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ZeroIt(ref LargeStructWithGC2 s) + { + // X64-NOT: CORINFO_HELP_MEMSET + // ARM64-NOT: CORINFO_HELP_MEMSET + s = default; + } + + struct LargeStructWithGC // 360 bytes (64-bit) + { + public string str; + public long b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1, p1, r1, s1, t1, u1, v1, w1, z1; + public long b2, c2, d2, e2, f2, g2, h2, i2, j2, k2, l2, m2, n2, o2, p2, r2, s2, t2, u2, v2, w2, z2; + } + + unsafe struct LargeStructWithGC2 // 4184 bytes (64-bit) { - public byte x; - public string a; - public long b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, r, s, t, u, v, w, z; + public fixed byte data[4000]; + public string str; + public long b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1, p1, r1, s1, t1, u1, v1, w1, z1; } } From 90e53b1d8dae75414388e2f1f22ba23a8f24d8d6 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 3 Dec 2023 03:22:54 +0100 Subject: [PATCH 04/22] Update StructWithGC_Zeroing.csproj --- src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj index 30458a4a8ca2d..7aa59749804e4 100644 --- a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj @@ -1,8 +1,7 @@ true - - + true None True From 24a60466936cb4b2758aa9d6903a515a2e8bc818 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 3 Dec 2023 11:35:57 +0100 Subject: [PATCH 05/22] Update StructWithGC_Zeroing.cs --- src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs index e5a2db11ec09f..0a8719a2430a3 100644 --- a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs @@ -18,7 +18,7 @@ public static void StructZeroingShouldNotUseMemset() LargeStructWithGC2 ls2 = default; ls2.str = "hello2"; ls2.z1 = long.MinValue; - ZeroIt(ref ls2); + ZeroIt2(ref ls2); if (ls1.str != null || ls2.str != null || ls1.z2 != 0 || ls2.z1 != 0) throw new InvalidOperationException("should be zeroed"); @@ -33,7 +33,7 @@ private static void ZeroIt(ref LargeStructWithGC s) } [MethodImpl(MethodImplOptions.NoInlining)] - private static void ZeroIt(ref LargeStructWithGC2 s) + private static void ZeroIt2(ref LargeStructWithGC2 s) { // X64-NOT: CORINFO_HELP_MEMSET // ARM64-NOT: CORINFO_HELP_MEMSET From 31dc4a80e6cba5f5fc42979d8299c6e03f6211e7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 12:46:09 +0100 Subject: [PATCH 06/22] clean up --- src/coreclr/jit/codegenarmarch.cpp | 7 +++-- src/coreclr/jit/codegenloongarch64.cpp | 39 ++++++++++++++++++++++++-- src/coreclr/jit/codegenriscv64.cpp | 39 ++++++++++++++++++++++++-- src/coreclr/jit/codegenxarch.cpp | 7 +++-- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index c23154a7146e4..a02680147d939 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3244,7 +3244,9 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node @@ -3279,6 +3281,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + // The loop is reversed (it makes it smaller) + GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { const regNumber offsetReg = initBlkNode->ExtractTempReg(); @@ -3297,7 +3301,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); } - GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index e4c949980475d..f89f65b8339f3 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6387,14 +6387,49 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: LoongArch64 impl + GenTree* const dstNode = initBlkNode->Addr(); + genConsumeReg(dstNode); + const regNumber dstReg = dstNode->GetRegNum(); + + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); + const regNumber zeroReg = zeroNode->GetRegNum(); + + if (initBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + // The loop is reversed (it makes it smaller) + //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + // TODO: LoongArch64: + // + //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); + //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// inst_JMP(EJ_ne, loop); + } } // Generate code for a load from some address + offset diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 7bbbfdc1fc6cd..9f06e72aa0f45 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6060,14 +6060,49 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: RISC-V impl + GenTree* const dstNode = initBlkNode->Addr(); + genConsumeReg(dstNode); + const regNumber dstReg = dstNode->GetRegNum(); + + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); + const regNumber zeroReg = zeroNode->GetRegNum(); + + if (initBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + // The loop is reversed (it makes it smaller) + //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + // TODO: RISC-V: + // + //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); + //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// inst_JMP(EJ_ne, loop); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index cd57a5d6bbea1..bd4c134a73ac1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3317,7 +3317,9 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node @@ -3344,6 +3346,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + // The loop is reversed (it makes it smaller) + GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { const regNumber offsetReg = initBlkNode->ExtractTempReg(); @@ -3356,7 +3360,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); } - GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); } #ifdef TARGET_AMD64 From 2e72231513eaa506058162842987125bab1a6700 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 14:25:38 +0100 Subject: [PATCH 07/22] Fix arm32 --- src/coreclr/jit/codegenarmarch.cpp | 22 +++++++++++----------- src/coreclr/jit/codegenloongarch64.cpp | 15 ++++++++------- src/coreclr/jit/codegenriscv64.cpp | 15 ++++++++------- src/coreclr/jit/codegenxarch.cpp | 2 +- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index a02680147d939..5d2f8109443d6 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3253,17 +3253,16 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - genConsumeReg(dstNode); - const regNumber dstReg = dstNode->GetRegNum(); - -#ifdef TARGET_ARM64 - const regNumber zeroReg = REG_ZR; -#else + GenTree* const dstNode = initBlkNode->Addr(); GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); const regNumber zeroReg = zeroNode->GetRegNum(); -#endif + + // TODO-ARM64: mark initBlkNode->Data() as contained and use WZR/XZR if (initBlkNode->IsVolatile()) { @@ -3271,12 +3270,13 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) instGen_MemoryBarrier(); } + // mov zeroReg, wzr + // str zeroReg, [dstReg] // mov offsetReg, //.LOOP: - // str xzr, [dstReg, offsetReg] + // str zeroReg, [dstReg, offsetReg] // subs offsetReg, offsetReg, #8 // bne .LOOP - // str xzr, [dstReg] const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); @@ -3297,7 +3297,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #else // There is no subs (sets flags) instruction on ARM32, so we use sub and cmp instead. GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); #endif inst_JMP(EJ_ne, loop); } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index f89f65b8339f3..71de335ef217f 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6396,12 +6396,13 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - genConsumeReg(dstNode); - const regNumber dstReg = dstNode->GetRegNum(); - + GenTree* const dstNode = initBlkNode->Addr(); GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); const regNumber zeroReg = zeroNode->GetRegNum(); if (initBlkNode->IsVolatile()) @@ -6413,6 +6414,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + //// TODO: implement on LoongArch64: + // The loop is reversed (it makes it smaller) //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) @@ -6423,11 +6426,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - // TODO: LoongArch64: - // //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); //// inst_JMP(EJ_ne, loop); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 9f06e72aa0f45..60d83200d40a6 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6069,12 +6069,13 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - genConsumeReg(dstNode); - const regNumber dstReg = dstNode->GetRegNum(); - + GenTree* const dstNode = initBlkNode->Addr(); GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); const regNumber zeroReg = zeroNode->GetRegNum(); if (initBlkNode->IsVolatile()) @@ -6086,6 +6087,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + //// TODO: implement on RISC-V: + // The loop is reversed (it makes it smaller) //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) @@ -6096,11 +6099,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - // TODO: RISC-V: - // //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); //// inst_JMP(EJ_ne, loop); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index bd4c134a73ac1..c1e832af35502 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3336,12 +3336,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const regNumber zeroReg = zeroNode->GetRegNum(); // xor zeroReg, zeroReg + // mov qword ptr [dstReg], zeroReg // mov offsetReg, //.LOOP: // mov qword ptr [dstReg + offsetReg], zeroReg // sub offsetReg, 8 // jne .LOOP - // mov qword ptr [dstReg], zeroReg const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); From 5d9a1db443b079fe8802de386618dbf76e3417bd Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 5 Dec 2023 02:21:59 +0100 Subject: [PATCH 08/22] Address feedback --- src/coreclr/jit/codegenarmarch.cpp | 11 ++++++----- src/coreclr/jit/codegenloongarch64.cpp | 7 +++++-- src/coreclr/jit/codegenriscv64.cpp | 7 +++++-- src/coreclr/jit/codegenxarch.cpp | 7 +++++-- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 5d2f8109443d6..b393b9c2491b3 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3281,11 +3281,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); @@ -3295,9 +3298,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #ifdef TARGET_ARM64 GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); #else - // There is no subs (sets flags) instruction on ARM32, so we use sub and cmp instead. - GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); + GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 71de335ef217f..d94b9d2d61977 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6416,11 +6416,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) //// TODO: implement on LoongArch64: - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index eb97532f95783..dc11e4d843c7c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6100,11 +6100,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) //// TODO: implement on RISC-V: - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c1e832af35502..6efd2699b81c2 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3346,11 +3346,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); From 3b5d1a95615826fc47d6cc6b2b09d50540e8fc96 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 5 Dec 2023 15:03:54 +0100 Subject: [PATCH 09/22] Add a small note --- docs/design/coreclr/botr/clr-abi.md | 44 +++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index bda2f1d1d3621..b168065146577 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -207,7 +207,7 @@ For non-rude thread abort, the VM walks the stack, running any catch handler tha For example: -``` +```cs try { // try 1 try { // try 2 System.Threading.Thread.CurrentThread.Abort(); @@ -223,7 +223,7 @@ L: In this case, if the address returned in catch 2 corresponding to label L is outside try 1, then the ThreadAbortException re-raised by the VM will not be caught by catch 1, as is expected. The JIT needs to insert a block such that this is the effective code generation: -``` +```cs try { // try 1 try { // try 2 System.Threading.Thread.CurrentThread.Abort(); @@ -240,7 +240,7 @@ L: Similarly, the automatic re-raise address for a ThreadAbortException can't be within a finally handler, or the VM will abort the re-raise and swallow the exception. This can happen due to call-to-finally thunks marked as "cloned finally", as described above. For example (this is pseudo-assembly-code, not C#): -``` +```cs try { // try 1 try { // try 2 System.Threading.Thread.CurrentThread.Abort(); @@ -256,7 +256,7 @@ L: This would generate something like: -``` +```asm // beginning of 'try 1' // beginning of 'try 2' System.Threading.Thread.CurrentThread.Abort(); @@ -281,7 +281,7 @@ Finally1: Note that the JIT must already insert a "step" block so the finally will be called. However, this isn't sufficient to support ThreadAbortException processing, because "L1" is marked as "cloned finally". In this case, the JIT must insert another step block that is within "try 1" but outside the cloned finally block, that will allow for correct re-raise semantics. For example: -``` +```asm // beginning of 'try 1' // beginning of 'try 2' System.Threading.Thread.CurrentThread.Abort(); @@ -399,7 +399,7 @@ To implement this requirement, for any function with EH, we create a frame-local Note that the since a slot on x86 is 4 bytes, the minimum size is 16 bytes. The idea is to have 1 slot for each handler that could be possibly be invoked at the same time. For example, for: -``` +```cs try { ... } catch { @@ -419,7 +419,7 @@ When calling a finally, we set the appropriate level to 0xFC (aka "finally call" Thus, calling a finally from JIT generated code looks like: -``` +```asm mov dword ptr [L_02+0x4 ebp-10H], 0 // This must happen before the 0xFC is written mov dword ptr [L_02+0x8 ebp-0CH], 252 // 0xFC push G_M52300_IG07 @@ -430,7 +430,7 @@ In this case, `G_M52300_IG07` is not the address after the 'jmp', so a simple 'c The code this finally returns to looks like this: -``` +```asm mov dword ptr [L_02+0x8 ebp-0CH], 0 jmp SHORT G_M52300_IG05 ``` @@ -479,7 +479,7 @@ Because a main function body will **always** be on the stack when one of its fun There is one "corner case" in the VM implementation of WantsReportOnlyLeaf model that has implications for the code the JIT is allowed to generate. Consider this function with nested exception handling: -``` +```cs public void runtest() { try { try { @@ -804,3 +804,29 @@ In addition to the usual registers it also preserves all float registers and `rc `CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. However, note that the use of `r11` in the dispatcher makes it incompatible with VSD calls where the JIT must fall back to the validator and a manual call. + +# Notes on Memset/Memcpy + +Generally, `memset` and `memcpy` do not provide any guarantees of atomicity. This implies that they should only be used when the memory being modified by `memset`/`memcpy` is not observable by any other thread (including GC), or when there are no atomicity requirements according to our [Memory Model](../../specs/Memory-model.md). It's especially important when we modify heap containing managed pointers - those must be updated atomically, e.g. using pointer-sized `mov` instruction (managed pointers are always aligned) - see [Atomic Memory Access](../../specs/Memory-model.md#Atomic-memory-accesses). It's worth noting that by "update" it's implied "set to zero", otherwise, we need a write barrier. + +Examples: + +```cs +struct MyStruct +{ + long a; + string b; +} + +void Test1(ref MyStruct m) +{ + // We're not allowed to use memset here + m = default; +} + +MyStruct Test2() +{ + // We can use memset here + return default; +} +``` \ No newline at end of file From 1b3208d859e20fb37918a11c7e9d2554326f5b67 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 02:23:39 +0100 Subject: [PATCH 10/22] Add RISC-V and LA64 --- src/coreclr/jit/codegenarmarch.cpp | 2 ++ src/coreclr/jit/codegenloongarch64.cpp | 21 +++++++++++++-------- src/coreclr/jit/codegenriscv64.cpp | 21 +++++++++++++-------- src/coreclr/jit/codegenxarch.cpp | 2 ++ src/coreclr/jit/lsrariscv64.cpp | 2 ++ 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index b393b9c2491b3..ed508c8db3b6d 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3293,6 +3293,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); #ifdef TARGET_ARM64 @@ -3301,6 +3302,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index d94b9d2d61977..8ecb6550efb6e 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6414,25 +6414,30 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - //// TODO: implement on LoongArch64: - // The loop is reversed - it makes it smaller. // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->GetSingleTempReg(); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); - //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); - //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); - //// inst_JMP(EJ_ne, loop); + // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); + // *tempReg = 0 + GetEmitter()->emitIns_R_R_R(INS_st_d, EA_PTRSIZE, zeroReg, tempReg, 0); + // offsetReg = offsetReg - 8 + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); + // if (offsetReg != 0) goto loop; + GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index dc11e4d843c7c..e435cb4dede4d 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6098,25 +6098,30 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - //// TODO: implement on RISC-V: - // The loop is reversed - it makes it smaller. // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->GetSingleTempReg(); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); - //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); - //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); - //// inst_JMP(EJ_ne, loop); + // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); + // *tempReg = 0 + GetEmitter()->emitIns_R_R_R(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); + // offsetReg = offsetReg - 8 + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); + // if (offsetReg != 0) goto loop; + GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 6efd2699b81c2..9089e3e52a2db 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3358,10 +3358,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 4798850103e39..09c0c4112dbd6 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1259,6 +1259,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) case GenTreeBlk::BlkOpKindLoop: // Needed for offsetReg buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + // Needed for tempReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); break; case GenTreeBlk::BlkOpKindHelper: From a129a55a5def71b6bd3a2a65ae91bb0bc4856eaf Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 14:04:55 +0100 Subject: [PATCH 11/22] Address feedback --- src/coreclr/jit/codegenarmarch.cpp | 5 +++-- src/coreclr/jit/codegenloongarch64.cpp | 5 +++-- src/coreclr/jit/codegenriscv64.cpp | 5 +++-- src/coreclr/jit/codegenxarch.cpp | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ed508c8db3b6d..dcc3b2589adcb 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3288,12 +3288,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); #ifdef TARGET_ARM64 @@ -3302,7 +3304,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); - GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 8ecb6550efb6e..f46735ca394ca 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6421,13 +6421,15 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6437,7 +6439,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); - GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index e435cb4dede4d..772186c8e8680 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6105,13 +6105,15 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6121,7 +6123,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); - GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9089e3e52a2db..9a949ae89e0e7 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3353,17 +3353,18 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - GetEmitter()->emitEnableGC(); } } From a86ecf5ef829f5a9fd2348724ad5e2e5f885cc7b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 15:36:48 +0100 Subject: [PATCH 12/22] fix build --- src/coreclr/jit/codegenarmarch.cpp | 14 ++++++++++++-- src/coreclr/jit/codegenloongarch64.cpp | 14 ++++++++++++-- src/coreclr/jit/codegenriscv64.cpp | 14 ++++++++++++-- src/coreclr/jit/codegenxarch.cpp | 14 ++++++++++++-- 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index dcc3b2589adcb..7fbe7f19a684b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3288,8 +3288,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3304,6 +3308,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index f46735ca394ca..aecb3517e4718 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6421,8 +6421,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6439,6 +6443,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 772186c8e8680..1fca688902140 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6105,8 +6105,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6123,6 +6127,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9a949ae89e0e7..b8082ef3ef90b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3353,8 +3353,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3365,6 +3369,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } From 133c140593d3231a0c64dfdd9fcc16927770d927 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 17:16:16 +0100 Subject: [PATCH 13/22] fix build --- src/coreclr/jit/codegenarmarch.cpp | 14 +++----------- src/coreclr/jit/codegenloongarch64.cpp | 14 +++----------- src/coreclr/jit/codegenriscv64.cpp | 14 +++----------- src/coreclr/jit/codegenxarch.cpp | 14 +++----------- 4 files changed, 12 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 7fbe7f19a684b..65f4fb062e200 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3288,12 +3288,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3309,11 +3305,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index aecb3517e4718..0c89291f58889 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6421,12 +6421,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6444,11 +6440,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 1fca688902140..3bb7de8f54022 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6105,12 +6105,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6128,11 +6124,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index b8082ef3ef90b..e10df3f791670 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3353,12 +3353,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3370,11 +3366,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } From 4349a0a65bf38f4ef48cc94a7b5a73862e8dac82 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 19:28:40 +0100 Subject: [PATCH 14/22] CI test --- src/coreclr/jit/codegenriscv64.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 8 ++++++++ src/coreclr/jit/lowerxarch.cpp | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 3bb7de8f54022..753717fdb6715 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6118,7 +6118,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); // *tempReg = 0 - GetEmitter()->emitIns_R_R_R(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 6313c11ee607f..66908783a2f25 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -570,6 +570,14 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) src = src->AsUnOp()->gtGetOp1(); } + // CI Test - use BlkOpKindLoop for more cases + // TODO: enable only under jitstress + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + return; + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT)) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 7e9bbbfdb3114..685294a343734 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -338,6 +338,14 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) src = src->AsUnOp()->gtGetOp1(); } + // CI Test - use BlkOpKindLoop for more cases + // TODO: enable only under jitstress + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + return; + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) { if (!src->OperIs(GT_CNS_INT)) From 2f135b7297157cb5f260ce3de1607c0ec1311e4f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 20:53:36 +0100 Subject: [PATCH 15/22] Fix build --- src/coreclr/jit/lowerarmarch.cpp | 15 ++++++++------- src/coreclr/jit/lowerxarch.cpp | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 66908783a2f25..b29ce62ca6e01 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -564,20 +564,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - if (src->OperIs(GT_INIT_VAL)) - { - src->SetContained(); - src = src->AsUnOp()->gtGetOp1(); - } - // CI Test - use BlkOpKindLoop for more cases // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && + src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT)) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 685294a343734..e30e486f2bb0e 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -332,20 +332,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - if (src->OperIs(GT_INIT_VAL)) - { - src->SetContained(); - src = src->AsUnOp()->gtGetOp1(); - } - // CI Test - use BlkOpKindLoop for more cases // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && + src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) { if (!src->OperIs(GT_CNS_INT)) From 3489223506942cfad763b30ed3081d7b45a7d05c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 23:51:29 +0100 Subject: [PATCH 16/22] Clean up --- src/coreclr/jit/codegenarmarch.cpp | 2 +- src/coreclr/jit/codegenloongarch64.cpp | 4 +++- src/coreclr/jit/codegenriscv64.cpp | 4 +++- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 9 +++++---- src/coreclr/jit/lowerxarch.cpp | 9 +++++---- 7 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 65f4fb062e200..51a2e5fdea93f 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3305,7 +3305,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 0c89291f58889..f610b0518a5be 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6430,6 +6430,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6439,8 +6440,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + GetEmitter()->emitEnableGC(); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 753717fdb6715..11c6a25311733 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6114,6 +6114,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6123,8 +6124,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + GetEmitter()->emitEnableGC(); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e10df3f791670..37b517e57f6b7 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3366,7 +3366,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e0fe58fc479de..8e69027672956 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10324,6 +10324,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX STRESS_MODE(IF_CONVERSION_COST) \ STRESS_MODE(IF_CONVERSION_INNER_LOOPS) \ STRESS_MODE(POISON_IMPLICIT_BYREFS) \ + STRESS_MODE(STORE_BLOCK_UNROLLING) \ STRESS_MODE(COUNT) enum compStressArea diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index b29ce62ca6e01..3b07928460643 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -564,14 +564,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - // CI Test - use BlkOpKindLoop for more cases - // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && - src->IsIntegralConst(0)) +#ifdef DEBUG + // Use BlkOpKindLoop for more cases under stress mode + if (comp->compStressCompile(Compiler::STRESS_STORE_BLOCK_UNROLLING, 50) && blkNode->OperIs(GT_STORE_BLK) && + ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } +#endif if (src->OperIs(GT_INIT_VAL)) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e30e486f2bb0e..43682651175f7 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -332,14 +332,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - // CI Test - use BlkOpKindLoop for more cases - // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && - src->IsIntegralConst(0)) +#ifdef DEBUG + // Use BlkOpKindLoop for more cases under stress mode + if (comp->compStressCompile(Compiler::STRESS_STORE_BLOCK_UNROLLING, 50) && blkNode->OperIs(GT_STORE_BLK) && + ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } +#endif if (src->OperIs(GT_INIT_VAL)) { From e4df08acc760feaef1346aab609e2cc56247c2ca Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 7 Dec 2023 20:45:36 +0100 Subject: [PATCH 17/22] Apply suggestion --- src/coreclr/jit/codegenloongarch64.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 44d50b2544da4..256be5a442ee2 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6424,23 +6424,18 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Extend liveness of dstReg in case if it gets killed by the store. gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - const regNumber offsetReg = initBlkNode->ExtractTempReg(); - const regNumber tempReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc - // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); - // *tempReg = 0 - GetEmitter()->emitIns_R_R_R(INS_st_d, EA_PTRSIZE, zeroReg, tempReg, 0); + // *(dstReg + offsetReg) = 0 + GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, zeroReg, dstReg, offsetReg); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); - GetEmitter()->emitEnableGC(); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } From 32425a9a9937b3010cfb0ba2c833a76d1a978215 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 8 Dec 2023 12:37:57 +0100 Subject: [PATCH 18/22] Use REG_R0 on RISC-V and LA64, use ZR on ARM64 --- src/coreclr/jit/codegenarmarch.cpp | 13 ++++++++----- src/coreclr/jit/codegenloongarch64.cpp | 15 +++++---------- src/coreclr/jit/codegenriscv64.cpp | 13 ++++--------- src/coreclr/jit/lowerarmarch.cpp | 12 ++++++++++-- src/coreclr/jit/lowerloongarch64.cpp | 9 +++++++-- src/coreclr/jit/lowerriscv64.cpp | 9 +++++++-- 6 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index fb7a0f77cf3f8..678ce6fc07a6f 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3253,14 +3253,17 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - GenTree* const zeroNode = initBlkNode->Data(); - + GenTree* const dstNode = initBlkNode->Addr(); genConsumeReg(dstNode); - genConsumeReg(zeroNode); + const regNumber dstReg = dstNode->GetRegNum(); - const regNumber dstReg = dstNode->GetRegNum(); +#ifdef TARGET_ARM64 + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); const regNumber zeroReg = zeroNode->GetRegNum(); +#else + const regNumber zeroReg = REG_ZR; +#endif // TODO-ARM64: mark initBlkNode->Data() as contained and use WZR/XZR diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 256be5a442ee2..6080beb2d8c0a 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6396,14 +6396,9 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - GenTree* const zeroNode = initBlkNode->Data(); - + GenTree* const dstNode = initBlkNode->Addr(); genConsumeReg(dstNode); - genConsumeReg(zeroNode); - - const regNumber dstReg = dstNode->GetRegNum(); - const regNumber zeroReg = zeroNode->GetRegNum(); + const regNumber dstReg = dstNode->GetRegNum(); if (initBlkNode->IsVolatile()) { @@ -6418,7 +6413,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, dstReg, 0); if (size > TARGET_POINTER_SIZE) { // Extend liveness of dstReg in case if it gets killed by the store. @@ -6431,11 +6426,11 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) genDefineTempLabel(loop); // *(dstReg + offsetReg) = 0 - GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, zeroReg, dstReg, offsetReg); + GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, REG_R0, dstReg, offsetReg); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; - GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, REG_R0); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 5cd1ef1520419..0828f3ab77813 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6087,14 +6087,9 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - GenTree* const zeroNode = initBlkNode->Data(); - + GenTree* const dstNode = initBlkNode->Addr(); genConsumeReg(dstNode); - genConsumeReg(zeroNode); - - const regNumber dstReg = dstNode->GetRegNum(); - const regNumber zeroReg = zeroNode->GetRegNum(); + const regNumber dstReg = dstNode->GetRegNum(); if (initBlkNode->IsVolatile()) { @@ -6109,7 +6104,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, dstReg, 0); if (size > TARGET_POINTER_SIZE) { // Extend liveness of dstReg in case if it gets killed by the store. @@ -6126,7 +6121,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); // *tempReg = 0 - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, tempReg, 0); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 3b07928460643..335cedefe18a2 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -618,10 +618,18 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); } + else if (blkNode->IsZeroingGcPointersOnHeap()) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; +#ifdef TARGET_ARM64 + // On ARM64 we can just use REG_ZR instead of having to load + // the constant into a real register like on ARM32. + src->SetContained(); +#endif + } else { - blkNode->gtBlkOpKind = - blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index be3db6e409759..5110442eda10d 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -326,10 +326,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); } + else if (blkNode->IsZeroingGcPointersOnHeap()) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + // We're going to use REG_R0 for zero + src->SetContained(); + } else { - blkNode->gtBlkOpKind = - blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp index c25c466330aa7..4f60458fd2516 100644 --- a/src/coreclr/jit/lowerriscv64.cpp +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -274,10 +274,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); } + else if (blkNode->IsZeroingGcPointersOnHeap()) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + // We're going to use REG_R0 for zero + src->SetContained(); + } else { - blkNode->gtBlkOpKind = - blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else From 52119d1237b1322d9518432f93a1c2cc709aac2b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 8 Dec 2023 15:01:38 +0100 Subject: [PATCH 19/22] fix build --- src/coreclr/jit/codegenarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 678ce6fc07a6f..0a411754d3dd8 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3257,7 +3257,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) genConsumeReg(dstNode); const regNumber dstReg = dstNode->GetRegNum(); -#ifdef TARGET_ARM64 +#ifndef TARGET_ARM64 GenTree* const zeroNode = initBlkNode->Data(); genConsumeReg(zeroNode); const regNumber zeroReg = zeroNode->GetRegNum(); From 98a4096ad190ae905e2a91b3d716be0c205a20b7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 8 Dec 2023 19:54:28 +0100 Subject: [PATCH 20/22] Clean up --- src/coreclr/jit/codegenarmarch.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 0a411754d3dd8..b2da90db01c7f 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3265,19 +3265,16 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const regNumber zeroReg = REG_ZR; #endif - // TODO-ARM64: mark initBlkNode->Data() as contained and use WZR/XZR - if (initBlkNode->IsVolatile()) { // issue a full memory barrier before a volatile initBlock Operation instGen_MemoryBarrier(); } - // mov zeroReg, wzr - // str zeroReg, [dstReg] + // str xzr, [dstReg] // mov offsetReg, //.LOOP: - // str zeroReg, [dstReg, offsetReg] + // str xzr, [dstReg, offsetReg] // subs offsetReg, offsetReg, #8 // bne .LOOP From 73b2066b132cd2c0062b093d9262c445cecad0ff Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 4 Jan 2024 15:27:49 +0100 Subject: [PATCH 21/22] Update src/coreclr/jit/codegenloongarch64.cpp Co-authored-by: Qiao Pengcheng --- src/coreclr/jit/codegenloongarch64.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 6080beb2d8c0a..e366f9b6b9497 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6422,15 +6422,13 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); - BasicBlock* loop = genCreateTempLabel(); - genDefineTempLabel(loop); - + // loop begin: // *(dstReg + offsetReg) = 0 GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, REG_R0, dstReg, offsetReg); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; - GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, REG_R0); + GetEmitter()->emitIns_R_I(INS_bnez, EA_8BYTE, offsetReg, -2 << 2); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } From 4cfcea966b46d0249c5e4c07d43ca801af7dfe73 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 4 Jan 2024 20:07:24 +0100 Subject: [PATCH 22/22] Apply suggestions for risc-v --- src/coreclr/jit/codegenriscv64.cpp | 18 +++++++++--------- src/coreclr/jit/lsrariscv64.cpp | 2 -- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 2691fccf3f153..7e6f71ec6c460 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6062,22 +6062,22 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Extend liveness of dstReg in case if it gets killed by the store. gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - const regNumber offsetReg = initBlkNode->ExtractTempReg(); - const regNumber tempReg = initBlkNode->ExtractTempReg(); - instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + const regNumber tempReg = initBlkNode->GetSingleTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tempReg, size - TARGET_POINTER_SIZE); + + // tempReg = dstReg + tempReg (a new interior pointer, but in a nongc region) + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, tempReg); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc - // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) - GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); // *tempReg = 0 GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, tempReg, 0); - // offsetReg = offsetReg - 8 - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); - // if (offsetReg != 0) goto loop; - GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + // tempReg = tempReg - 8 + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -8); + // if (tempReg != dstReg) goto loop; + GetEmitter()->emitIns_J(INS_bne, loop, (int)tempReg | ((int)dstReg << 5)); GetEmitter()->emitEnableGC(); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index ae460ec27f2b8..cc4351a5a254e 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1248,8 +1248,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindLoop: - // Needed for offsetReg - buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); // Needed for tempReg buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); break;