diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 5b7ec3f559c25..39003cb739852 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -56,22 +56,24 @@ enum CORINFO_InstructionSet InstructionSet_POPCNT=16, InstructionSet_Vector128=17, InstructionSet_Vector256=18, - InstructionSet_X86Base_X64=19, - InstructionSet_SSE_X64=20, - InstructionSet_SSE2_X64=21, - InstructionSet_SSE3_X64=22, - InstructionSet_SSSE3_X64=23, - InstructionSet_SSE41_X64=24, - InstructionSet_SSE42_X64=25, - InstructionSet_AVX_X64=26, - InstructionSet_AVX2_X64=27, - InstructionSet_AES_X64=28, - InstructionSet_BMI1_X64=29, - InstructionSet_BMI2_X64=30, - InstructionSet_FMA_X64=31, - InstructionSet_LZCNT_X64=32, - InstructionSet_PCLMULQDQ_X64=33, - InstructionSet_POPCNT_X64=34, + InstructionSet_AVXVNNI=19, + InstructionSet_X86Base_X64=20, + InstructionSet_SSE_X64=21, + InstructionSet_SSE2_X64=22, + InstructionSet_SSE3_X64=23, + InstructionSet_SSSE3_X64=24, + InstructionSet_SSE41_X64=25, + InstructionSet_SSE42_X64=26, + InstructionSet_AVX_X64=27, + InstructionSet_AVX2_X64=28, + InstructionSet_AES_X64=29, + InstructionSet_BMI1_X64=30, + InstructionSet_BMI2_X64=31, + InstructionSet_FMA_X64=32, + InstructionSet_LZCNT_X64=33, + InstructionSet_PCLMULQDQ_X64=34, + InstructionSet_POPCNT_X64=35, + InstructionSet_AVXVNNI_X64=36, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -92,22 +94,24 @@ enum CORINFO_InstructionSet InstructionSet_POPCNT=16, InstructionSet_Vector128=17, InstructionSet_Vector256=18, - InstructionSet_X86Base_X64=19, - InstructionSet_SSE_X64=20, - InstructionSet_SSE2_X64=21, - InstructionSet_SSE3_X64=22, - InstructionSet_SSSE3_X64=23, - InstructionSet_SSE41_X64=24, - InstructionSet_SSE42_X64=25, - InstructionSet_AVX_X64=26, - InstructionSet_AVX2_X64=27, - InstructionSet_AES_X64=28, - InstructionSet_BMI1_X64=29, - InstructionSet_BMI2_X64=30, - InstructionSet_FMA_X64=31, - InstructionSet_LZCNT_X64=32, - InstructionSet_PCLMULQDQ_X64=33, - InstructionSet_POPCNT_X64=34, + InstructionSet_AVXVNNI=19, + InstructionSet_X86Base_X64=20, + InstructionSet_SSE_X64=21, + InstructionSet_SSE2_X64=22, + InstructionSet_SSE3_X64=23, + InstructionSet_SSSE3_X64=24, + InstructionSet_SSE41_X64=25, + InstructionSet_SSE42_X64=26, + InstructionSet_AVX_X64=27, + InstructionSet_AVX2_X64=28, + InstructionSet_AES_X64=29, + InstructionSet_BMI1_X64=30, + InstructionSet_BMI2_X64=31, + InstructionSet_FMA_X64=32, + InstructionSet_LZCNT_X64=33, + InstructionSet_PCLMULQDQ_X64=34, + InstructionSet_POPCNT_X64=35, + InstructionSet_AVXVNNI_X64=36, #endif // TARGET_X86 }; @@ -205,6 +209,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_PCLMULQDQ_X64); if (HasInstructionSet(InstructionSet_POPCNT)) AddInstructionSet(InstructionSet_POPCNT_X64); + if (HasInstructionSet(InstructionSet_AVXVNNI)) + AddInstructionSet(InstructionSet_AVXVNNI_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -342,6 +348,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_POPCNT); if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -530,6 +540,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector128"; case InstructionSet_Vector256 : return "Vector256"; + case InstructionSet_AVXVNNI : + return "AVXVNNI"; + case InstructionSet_AVXVNNI_X64 : + return "AVXVNNI_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -568,6 +582,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector128"; case InstructionSet_Vector256 : return "Vector256"; + case InstructionSet_AVXVNNI : + return "AVXVNNI"; #endif // TARGET_X86 default: @@ -615,6 +631,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; + case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -633,6 +650,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT; case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; + case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 548fcf5f7f398..25746b4599294 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,12 +43,12 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 81a5e384-8ca5-4947-8b2e-1d76556728fd */ - 0x81a5e384, - 0x8ca5, - 0x4947, - {0x8b, 0x2e, 0x1d, 0x76, 0x55, 0x67, 0x28, 0xfd} -}; +constexpr GUID JITEEVersionIdentifier = { /* 1052f490-cad7-4610-99bb-6f2bd91a1d19 */ + 0x1052f490, + 0xcad7, + 0x4610, + {0x99, 0xbb, 0x6f, 0x2b, 0xd9, 0x1a, 0x1d, 0x19} + }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 9a4d0ba2ef9b1..1b66c6e520891 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -33,6 +33,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_X86Base=22, READYTORUN_INSTRUCTION_Dp=23, READYTORUN_INSTRUCTION_Rdm=24, + READYTORUN_INSTRUCTION_AvxVnni=25, }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index af16742179908..0ca17bb6ee83a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2419,6 +2419,11 @@ void Compiler::compSetProcessor() instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2); } + if (!JitConfig.EnableAVXVNNI()) + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVXVNNI); + } + if (!JitConfig.EnableLZCNT()) { instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index c22d52bdc49ae..da7febee92b0d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -44,6 +44,11 @@ bool IsFMAInstruction(instruction ins) return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION); } +bool IsAVXVNNIInstruction(instruction ins) +{ + return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION); +} + bool IsBMIInstruction(instruction ins) { return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION); @@ -6314,7 +6319,7 @@ void emitter::emitIns_SIMD_R_R_S_I( void emitter::emitIns_SIMD_R_R_R_A( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir) { - assert(IsFMAInstruction(ins)); + assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseVEXEncoding()); // Ensure we aren't overwriting op2 @@ -6395,7 +6400,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, void emitter::emitIns_SIMD_R_R_R_R( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg) { - if (IsFMAInstruction(ins)) + if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins)) { assert(UseVEXEncoding()); @@ -6463,7 +6468,7 @@ void emitter::emitIns_SIMD_R_R_R_R( void emitter::emitIns_SIMD_R_R_R_S( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs) { - assert(IsFMAInstruction(ins)); + assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseVEXEncoding()); // Ensure we aren't overwriting op2 @@ -15633,6 +15638,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vfnmsub132ss: case INS_vfnmsub213ss: case INS_vfnmsub231ss: + case INS_vpdpbusd: // will be populated when the HW becomes publicly available + case INS_vpdpwssd: // will be populated when the HW becomes publicly available + case INS_vpdpbusds: // will be populated when the HW becomes publicly available + case INS_vpdpwssds: // will be populated when the HW becomes publicly available // uops.info result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency += PERFSCORE_LATENCY_4C; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index be6e5779269ad..8448dbdd46c9e 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -324,7 +324,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genHWIntrinsic_R_R_RM_R(node, ins); break; } + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2Reg != REG_NA); + genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3); + break; + } default: { unreached(); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 82d9a4356b2cd..eb9bac12e3051 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -567,7 +567,13 @@ HARDWARE_INTRINSIC(AVX2, SubtractSaturate, HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) - +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// AVXVNNI Intrinsics +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 0155e4120b50e..6e812274471fd 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -36,6 +36,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX_X64; case InstructionSet_AVX2: return InstructionSet_AVX2_X64; + case InstructionSet_AVXVNNI: + return InstructionSet_AVXVNNI_X64; case InstructionSet_AES: return InstructionSet_AES_X64; case InstructionSet_BMI1: @@ -80,6 +82,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_AVX2; } + if (strcmp(className, "AvxVnni") == 0) + { + return InstructionSet_AVXVNNI; + } } else if (className[0] == 'S') { @@ -348,6 +354,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX_X64: case InstructionSet_AVX2: case InstructionSet_AVX2_X64: + case InstructionSet_AVXVNNI: + case InstructionSet_AVXVNNI_X64: case InstructionSet_BMI1: case InstructionSet_BMI1_X64: case InstructionSet_BMI2: diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 25e5de73caf74..750f1b215036b 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -583,6 +583,13 @@ INST3(vfnmsub213ss, "fnmsub213ss", IUM_WR, BAD_CODE, BAD_CODE, INST3(vfnmsub231ss, "fnmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) // INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) +INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) +INST3(vpdpbusd, "pdpbusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x50), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes +INST3(vpdpwssd, "pdpwssd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x52), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers +INST3(vpdpbusds, "pdpbusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x51), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation +INST3(vpdpwssds, "pdpwssds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x53), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation +INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) + // BMI1 INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index a5937939a6006..04ade52092471 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -279,6 +279,7 @@ CONFIG_INTEGER(EnableSSE41, W("EnableSSE41"), 1) // Enable SSE41 CONFIG_INTEGER(EnableSSE42, W("EnableSSE42"), 1) // Enable SSE42 CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Enable AVX2 +CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Enable AVXVNNI CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Enable FMA CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Enable AES CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Enable BMI1 diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 4acfd81f5ca36..4d4253006047c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6347,7 +6347,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } - + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + MakeSrcContained(node, op3); + } + else if (supportsRegOptional) + { + op3->SetRegOptional(); + } + break; + } case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 5c76005fc8682..1cd81124d0566 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2373,6 +2373,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) break; } + case NI_AVXVNNI_MultiplyWideningAndAdd: + case NI_AVXVNNI_MultiplyWideningAndAddSaturate: + { + assert(numArgs == 3); + + tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildDelayFreeUses(op2, op1); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + + buildUses = false; + break; + } + case NI_AVX2_GatherVector128: case NI_AVX2_GatherVector256: { diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 5a1093e1dbce5..ea4aa13ded125 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -36,6 +36,7 @@ public enum ReadyToRunInstructionSet X86Base=22, Dp=23, Rdm=24, + AvxVnni=25, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 7b816cdd9e11d..ffc302a827567 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -86,6 +86,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt; case InstructionSet.X64_Vector128: return null; case InstructionSet.X64_Vector256: return null; + case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; + case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni; default: throw new Exception("Unknown instruction set"); } @@ -129,6 +131,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_POPCNT_X64: return null; case InstructionSet.X86_Vector128: return null; case InstructionSet.X86_Vector256: return null; + case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; + case InstructionSet.X86_AVXVNNI_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 8a8111d514268..f1ffa0a0cd4eb 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -55,22 +55,24 @@ public enum InstructionSet X64_POPCNT=16, X64_Vector128=17, X64_Vector256=18, - X64_X86Base_X64=19, - X64_SSE_X64=20, - X64_SSE2_X64=21, - X64_SSE3_X64=22, - X64_SSSE3_X64=23, - X64_SSE41_X64=24, - X64_SSE42_X64=25, - X64_AVX_X64=26, - X64_AVX2_X64=27, - X64_AES_X64=28, - X64_BMI1_X64=29, - X64_BMI2_X64=30, - X64_FMA_X64=31, - X64_LZCNT_X64=32, - X64_PCLMULQDQ_X64=33, - X64_POPCNT_X64=34, + X64_AVXVNNI=19, + X64_X86Base_X64=20, + X64_SSE_X64=21, + X64_SSE2_X64=22, + X64_SSE3_X64=23, + X64_SSSE3_X64=24, + X64_SSE41_X64=25, + X64_SSE42_X64=26, + X64_AVX_X64=27, + X64_AVX2_X64=28, + X64_AES_X64=29, + X64_BMI1_X64=30, + X64_BMI2_X64=31, + X64_FMA_X64=32, + X64_LZCNT_X64=33, + X64_PCLMULQDQ_X64=34, + X64_POPCNT_X64=35, + X64_AVXVNNI_X64=36, X86_X86Base=1, X86_SSE=2, X86_SSE2=3, @@ -89,22 +91,24 @@ public enum InstructionSet X86_POPCNT=16, X86_Vector128=17, X86_Vector256=18, - X86_X86Base_X64=19, - X86_SSE_X64=20, - X86_SSE2_X64=21, - X86_SSE3_X64=22, - X86_SSSE3_X64=23, - X86_SSE41_X64=24, - X86_SSE42_X64=25, - X86_AVX_X64=26, - X86_AVX2_X64=27, - X86_AES_X64=28, - X86_BMI1_X64=29, - X86_BMI2_X64=30, - X86_FMA_X64=31, - X86_LZCNT_X64=32, - X86_PCLMULQDQ_X64=33, - X86_POPCNT_X64=34, + X86_AVXVNNI=19, + X86_X86Base_X64=20, + X86_SSE_X64=21, + X86_SSE2_X64=22, + X86_SSE3_X64=23, + X86_SSSE3_X64=24, + X86_SSE41_X64=25, + X86_SSE42_X64=26, + X86_AVX_X64=27, + X86_AVX2_X64=28, + X86_AES_X64=29, + X86_BMI1_X64=30, + X86_BMI2_X64=31, + X86_FMA_X64=32, + X86_LZCNT_X64=33, + X86_PCLMULQDQ_X64=34, + X86_POPCNT_X64=35, + X86_AVXVNNI_X64=36, } @@ -298,6 +302,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64)) resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -449,6 +457,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64)) resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -574,6 +584,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); + yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); break; case TargetArchitecture.X86: @@ -595,6 +606,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true); yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); + yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); break; } @@ -657,6 +669,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64); if (HasInstructionSet(InstructionSet.X64_POPCNT)) AddInstructionSet(InstructionSet.X64_POPCNT_X64); + if (HasInstructionSet(InstructionSet.X64_AVXVNNI)) + AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); break; case TargetArchitecture.X86: @@ -698,6 +712,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_LZCNT_X64); AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X64_POPCNT_X64); + AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); break; case TargetArchitecture.X86: @@ -717,6 +732,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_LZCNT_X64); AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X86_POPCNT_X64); + AddInstructionSet(InstructionSet.X86_AVXVNNI_X64); break; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 6e64e7e2b02c0..8bfdc9a9d86a7 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -40,6 +40,7 @@ instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ,pclmul instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt instructionset ,X86 , , , ,Vector128, instructionset ,X86 , , , ,Vector256, +instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -57,6 +58,7 @@ instructionset64bit,X86 ,FMA instructionset64bit,X86 ,LZCNT instructionset64bit,X86 ,PCLMULQDQ instructionset64bit,X86 ,POPCNT +instructionset64bit,X86 ,AVXVNNI implication ,X86 ,SSE ,X86Base implication ,X86 ,SSE2 ,SSE diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 1898a791903f6..592ff2e1c38bb 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1308,6 +1308,9 @@ void EEJitManager::SetCpuInfo() // CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): // CORJIT_FLAG_USE_AVX // AVX2 - EBX bit 5 + // CORJIT_FLAG_USE_AVXVNNI if the following feature bit is set (input EAX of 0x07 and input ECX of 1): + // CORJIT_FLAG_USE_AVX2 + // AVXVNNI - EAX bit 4 // CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without // CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): // BMI1 - EBX bit 3 @@ -1385,6 +1388,12 @@ void EEJitManager::SetCpuInfo() if ((cpuidInfo[EBX] & (1 << 5)) != 0) // AVX2 { CPUCompileFlags.Set(InstructionSet_AVX2); + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + if ((cpuidInfo[EAX] & (1 << 4)) != 0) // AVX-VNNI + { + CPUCompileFlags.Set(InstructionSet_AVXVNNI); + } } } } diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml index df1020f4b8d5d..59d0783d0a789 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml @@ -18,6 +18,12 @@ + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index fb5011f59f0c7..868caaefa14d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1973,6 +1973,7 @@ + @@ -1991,6 +1992,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs new file mode 100644 index 0000000000000..2edfd97a518ad --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.Versioning; + +namespace System.Runtime.Intrinsics.X86 +{ + [CLSCompliant(false)] + [RequiresPreviewFeatures] + public abstract class AvxVnni : Avx2 + { + internal AvxVnni() { } + + public static new bool IsSupported { [Intrinsic] get { return false; } } + + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + public static new bool IsSupported { [Intrinsic] get { return false; } } + } + + /// + /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSD xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSD xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSD ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSD ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSDS xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSDS xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSDS ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSDS ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs new file mode 100644 index 0000000000000..d6bb750ebc89a --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.Versioning; + +namespace System.Runtime.Intrinsics.X86 +{ + [Intrinsic] + [CLSCompliant(false)] + [RequiresPreviewFeatures] + public abstract class AvxVnni : Avx2 + { + internal AvxVnni() { } + + public static new bool IsSupported { get => IsSupported; } + + [Intrinsic] + public new abstract class X64 : Avx2.X64 + { + internal X64() { } + + public static new bool IsSupported { get => IsSupported; } + } + + /// + /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSD xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSD xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAdd(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSD ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSD ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAdd(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAdd(addend, left, right); + + /// + /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPBUSDS xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// + /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b) + /// VPDPWSSDS xmm, xmm, xmm/m128 + /// + public static Vector128 MultiplyWideningAndAddSaturate(Vector128 addend, Vector128 left, Vector128 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// + /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPBUSDS ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + + /// + /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b) + /// VPDPWSSDS ymm, ymm, ymm/m256 + /// + public static Vector256 MultiplyWideningAndAddSaturate(Vector256 addend, Vector256 left, Vector256 right) => MultiplyWideningAndAddSaturate(addend, left, right); + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 6cf453fd8d0c8..6ad7fc3eb9842 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -3,6 +3,7 @@ // ------------------------------------------------------------------------------ // Changes to this file must follow the https://aka.ms/api-review process. // ------------------------------------------------------------------------------ +using System.Runtime.Versioning; namespace System.Runtime.Intrinsics { @@ -3368,6 +3369,28 @@ internal X64() { } public static new bool IsSupported { get { throw null; } } } } + + [System.CLSCompliantAttribute(false)] + [RequiresPreviewFeatures] + public abstract class AvxVnni : System.Runtime.Intrinsics.X86.Avx2 + { + internal AvxVnni() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128 addend, System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256 addend, System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + } + [System.CLSCompliantAttribute(false)] public abstract partial class Bmi1 : System.Runtime.Intrinsics.X86.X86Base { diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 53df6cb4f6b74..40df6910a9181 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -2082,6 +2082,7 @@ static const IntrinGroup supported_x86_intrinsics [] = { { "Aes", MONO_CPU_X86_AES, aes_methods, sizeof (aes_methods) }, { "Avx", MONO_CPU_X86_AVX, unsupported, sizeof (unsupported) }, { "Avx2", MONO_CPU_X86_AVX2, unsupported, sizeof (unsupported) }, + { "AvxVnni", 0, unsupported, sizeof (unsupported) }, { "Bmi1", MONO_CPU_X86_BMI1, bmi1_methods, sizeof (bmi1_methods) }, { "Bmi2", MONO_CPU_X86_BMI2, bmi2_methods, sizeof (bmi2_methods) }, { "Fma", MONO_CPU_X86_FMA, unsupported, sizeof (unsupported) }, diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs new file mode 100644 index 0000000000000..0a38e01b11e12 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs @@ -0,0 +1,501 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2]) + + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs new file mode 100644 index 0000000000000..4907c183f800d --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs @@ -0,0 +1,500 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs new file mode 100644 index 0000000000000..2e221073681d0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs @@ -0,0 +1,504 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp; + outArray[i] = c; + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs new file mode 100644 index 0000000000000..3c755d8e97ba9 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs @@ -0,0 +1,505 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.ComponentModel; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector256 _fld0; + public Vector256 _fld1; + public Vector256 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector256 _clsVar0; + private static Vector256 _clsVar1; + private static Vector256 _clsVar2; + + private Vector256 _fld0; + private Vector256 _fld1; + private Vector256 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256), typeof(Vector256), typeof(Vector256) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector256)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector256 addend, Vector256 left, Vector256 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue: tmp; + outArray[i] = c; + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector256, Vector256): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj new file mode 100644 index 0000000000000..721cd42651892 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj new file mode 100644 index 0000000000000..58b4945788725 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + True + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs new file mode 100644 index 0000000000000..ff68d04e248e0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + static Program() + { + TestList = new Dictionary() { + ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte, + ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16, + ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte, + ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16, + }; + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs new file mode 100644 index 0000000000000..fe05ed09cfd86 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs @@ -0,0 +1,515 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result1 = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result1); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, Vector128 result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.WriteUnaligned(ref Unsafe.As(ref outArray[0]), result); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2]) + + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs new file mode 100644 index 0000000000000..8adf4e588dd92 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs @@ -0,0 +1,500 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAdd( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAdd(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue); + } + + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs new file mode 100644 index 0000000000000..6b003ef003845 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs @@ -0,0 +1,503 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateByte() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Byte); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(SByte); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Byte[] _data1 = new Byte[Op1ElementCount]; + private static SByte[] _data2 = new SByte[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Byte[] inArray1 = new Byte[Op1ElementCount]; + SByte[] inArray2 = new SByte[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp; + outArray[i] = c; + } + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs new file mode 100644 index 0000000000000..a96951f4ace85 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs @@ -0,0 +1,503 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Text.RegularExpressions; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + private static void MultiplyWideningAndAddSaturateInt16() + { + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + else + { + Console.WriteLine("Avx Is Not Supported"); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it. + + if (Avx.IsSupported) + { + // Validates calling via reflection works, using Load + test.RunReflectionScenario_Load(); + + // Validates calling via reflection works, using LoadAligned + test.RunReflectionScenario_LoadAligned(); + } + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + if (Avx.IsSupported) + { + // Validates passing a local works, using Load + test.RunLclVarScenario_Load(); + + // Validates passing a local works, using LoadAligned + test.RunLclVarScenario_LoadAligned(); + } + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + Console.WriteLine("Test Is Not Supported"); + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf(); + + if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alighment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As(ref inArray2[0]), (uint)sizeOfinArray2); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlighment) + { + return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1)); + } + } + private struct TestStruct + { + public Vector128 _fld0; + public Vector128 _fld1; + public Vector128 _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref testStruct._fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + return testStruct; + } + + public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass) + { + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = 32; + + private static readonly int Op0ElementCount = Unsafe.SizeOf>() / sizeof(Int32); + private static readonly int Op1ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int Op2ElementCount = Unsafe.SizeOf>() / sizeof(Int16); + private static readonly int RetElementCount = Unsafe.SizeOf>() / sizeof(Int32); + + private static Int32[] _data0 = new Int32[Op0ElementCount]; + private static Int16[] _data1 = new Int16[Op1ElementCount]; + private static Int16[] _data2 = new Int16[Op2ElementCount]; + + private static Vector128 _clsVar0; + private static Vector128 _clsVar1; + private static Vector128 _clsVar2; + + private Vector128 _fld0; + private Vector128 _fld1; + private Vector128 _fld2; + + private DataTable _dataTable; + + static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _clsVar2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + } + + public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16() + { + Succeeded = true; + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld0), ref Unsafe.As(ref _data0[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld1), ref Unsafe.As(ref _data1[0]), (uint)Unsafe.SizeOf>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); } + Unsafe.CopyBlockUnaligned(ref Unsafe.As, byte>(ref _fld2), ref Unsafe.As(ref _data2[0]), (uint)Unsafe.SizeOf>()); + + for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); } + _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => AvxVnni.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Unsafe.Read>(_dataTable.inArray0Ptr), + Unsafe.Read>(_dataTable.inArray1Ptr), + Unsafe.Read>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned)); + + var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128), typeof(Vector128), typeof(Vector128) }) + .Invoke(null, new object[] { + Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)), + Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)) + }); + + Unsafe.Write(_dataTable.outArrayPtr, (Vector128)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate( + _clsVar0, + _clsVar1, + _clsVar2 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var first = Unsafe.Read>(_dataTable.inArray0Ptr); + var second = Unsafe.Read>(_dataTable.inArray1Ptr); + var third = Unsafe.Read>(_dataTable.inArray2Ptr); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); + + var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); + + var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)); + var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)); + var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr)); + var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(first, second, third, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult(Vector128 addend, Vector128 left, Vector128 right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray0[0]), addend); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "") + { + Int32[] inArray0 = new Int32[Op0ElementCount]; + Int16[] inArray1 = new Int16[Op1ElementCount]; + Int16[] inArray2 = new Int16[Op2ElementCount]; + Int32[] outArray = new Int32[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray0[0]), ref Unsafe.AsRef(addend), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf>()); + + ValidateResult(inArray0, inArray1, inArray2, outArray, method); + } + + private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + Int32[] outArray = new Int32[RetElementCount]; + + for (var i = 0; i < RetElementCount; i++) + { + int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2]; + int value = addend[i] + addend2; + int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value; + int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp; + outArray[i] = c; + } + for (var i = 0; i < RetElementCount; i++) + { + if (result[i] != outArray[i]) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}(Vector128, Vector128): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj new file mode 100644 index 0000000000000..8274558730341 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj new file mode 100644 index 0000000000000..669831c75815b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj @@ -0,0 +1,22 @@ + + + Exe + true + + true + + true + + + Embedded + True + + + + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs new file mode 100644 index 0000000000000..ff68d04e248e0 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + static Program() + { + TestList = new Dictionary() { + ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte, + ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16, + ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte, + ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16, + }; + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs b/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs index 884eff7b4465c..1d772682f8644 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs @@ -72,6 +72,7 @@ private static void PrintSupportedIsa() TestLibrary.TestFramework.LogInformation($" AES: {Aes.IsSupported}"); TestLibrary.TestFramework.LogInformation($" AVX: {Avx.IsSupported}"); TestLibrary.TestFramework.LogInformation($" AVX2: {Avx2.IsSupported}"); + TestLibrary.TestFramework.LogInformation($" AVXVNNI: {AvxVnni.IsSupported}"); TestLibrary.TestFramework.LogInformation($" BMI1: {Bmi1.IsSupported}"); TestLibrary.TestFramework.LogInformation($" BMI2: {Bmi2.IsSupported}"); TestLibrary.TestFramework.LogInformation($" FMA: {Fma.IsSupported}"); diff --git a/src/tests/issues.targets b/src/tests/issues.targets index ff6ffd3a7cc0c..926996ebf9ab5 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1034,6 +1034,12 @@ https://github.com/dotnet/runtime/issues/48190 + + Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078 + + + Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078 + Mono doesn't have a dynamic pgo or tiered compilation infrastructure