Skip to content

Commit

Permalink
Implement StoreVectorAndZipNxM for Arm64 (dotnet#94607)
Browse files Browse the repository at this point in the history
* Implement StoreVectorAndZipNxM for Arm64 N=(64,128) & M=(3, 4)

* Fix assert ensuring correct register count in codegen
  • Loading branch information
SwapnilGaikwad authored Nov 13, 2023
1 parent 343c169 commit b1d6cd8
Show file tree
Hide file tree
Showing 8 changed files with 644 additions and 5 deletions.
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1744,6 +1744,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_StoreVector64x2AndZip:
case NI_AdvSimd_StoreVector64x3AndZip:
case NI_AdvSimd_StoreVector64x4AndZip:
case NI_AdvSimd_Arm64_StoreVector128x2AndZip:
case NI_AdvSimd_Arm64_StoreVector128x3AndZip:
case NI_AdvSimd_Arm64_StoreVector128x4AndZip:
case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
Expand Down
11 changes: 9 additions & 2 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg);
break;

case NI_AdvSimd_StoreVector64x2AndZip:
case NI_AdvSimd_StoreVector64x3AndZip:
case NI_AdvSimd_StoreVector64x4AndZip:
case NI_AdvSimd_Arm64_StoreVector128x2AndZip:
case NI_AdvSimd_Arm64_StoreVector128x3AndZip:
case NI_AdvSimd_Arm64_StoreVector128x4AndZip:
case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
Expand All @@ -821,8 +827,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
assert(argReg == argNode->GetRegNum());
argReg = REG_NEXT(argReg);
}
assert((ins == INS_st1_2regs && regCount == 2) || (ins == INS_st1_3regs && regCount == 3) ||
(ins == INS_st1_4regs && regCount == 4));
assert((ins == INS_st1_2regs && regCount == 2) || (ins == INS_st2 && regCount == 2) ||
(ins == INS_st1_3regs && regCount == 3) || (ins == INS_st3 && regCount == 3) ||
(ins == INS_st1_4regs && regCount == 4) || (ins == INS_st4 && regCount == 4));
#endif

GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);
Expand Down
12 changes: 9 additions & 3 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,12 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper,
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, -1, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3, 8, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_invalid, INS_invalid, INS_st1_3regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4, 8, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_invalid, INS_invalid, INS_st1_4regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2AndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3AndZip, 8, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4AndZip, 8, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3, 8, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_invalid, INS_invalid, INS_st1_3regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4, 8, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_invalid, INS_invalid, INS_st1_4regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, true, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, true, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, true, {INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics)
Expand Down Expand Up @@ -673,6 +676,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair,
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2AndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3AndZip, 16, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4AndZip, 16, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3, 16, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4, 16, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
BuildDef(intrinsicTree);
*pDstCount = 1;
break;

case NI_AdvSimd_StoreVector64x2AndZip:
case NI_AdvSimd_StoreVector64x3AndZip:
case NI_AdvSimd_StoreVector64x4AndZip:
case NI_AdvSimd_Arm64_StoreVector128x2AndZip:
case NI_AdvSimd_Arm64_StoreVector128x3AndZip:
case NI_AdvSimd_Arm64_StoreVector128x4AndZip:
case NI_AdvSimd_StoreVector64x2:
case NI_AdvSimd_StoreVector64x3:
case NI_AdvSimd_StoreVector64x4:
Expand All @@ -1609,6 +1616,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
buildInternalRegisterUses();
*pDstCount = 0;
break;

case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
Expand Down
Loading

0 comments on commit b1d6cd8

Please sign in to comment.