Skip to content

Commit

Permalink
Cleanup some HWIntrinsic logic to ensure the right gtType and simdSiz…
Browse files Browse the repository at this point in the history
…e are being set (#83516)

* Cleanup some HWIntrinsic logic to ensure the right gtType and simdSize are being set

* Apply formatting patch

* Add a missing GetLower call

* Fix an assert for lowering TYP_SIMD12 where its handled as TYP_SIMD16

* Ensure GetLower is used in Dot for TYP_SIMD32

* Apply formatting patch

* Insert after, not before, for the _GetLower to avoid a codegen regression

* Put the _GetLower in the right place to avoid the codegen regression

* Don't change the simd size of TYP_SIMD8/12 DotProduct unnecessarily
  • Loading branch information
tannergooding committed Mar 21, 2023
1 parent beab6cd commit 12e9711
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 110 deletions.
59 changes: 28 additions & 31 deletions src/coreclr/jit/gentree.cpp
Expand Up @@ -19544,7 +19544,8 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
assert(varTypeIsArithmetic(simdBaseType));

assert(op1 != nullptr);
assert(op1->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
assert(op1->TypeIs(type, simdBaseType, genActualType(simdBaseType)) ||
(op1->TypeIs(TYP_SIMD12) && type == TYP_SIMD16));

assert(op2 != nullptr);

Expand All @@ -19554,7 +19555,8 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
}
else
{
assert(op2->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
assert(op2->TypeIs(type, simdBaseType, genActualType(simdBaseType)) ||
(op2->TypeIs(TYP_SIMD12) && type == TYP_SIMD16));
}

NamedIntrinsic intrinsic = NI_Illegal;
Expand Down Expand Up @@ -22425,11 +22427,9 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
simdSize, isSimdAsHWIntrinsic);

CorInfoType permuteBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
Expand Down Expand Up @@ -22468,11 +22468,9 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE41_PackUnsignedSaturate, CORINFO_TYPE_USHORT,
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_USHORT,
simdSize, isSimdAsHWIntrinsic);

CorInfoType permuteBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
Expand Down Expand Up @@ -22576,10 +22574,8 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
simdSize, isSimdAsHWIntrinsic);
Expand Down Expand Up @@ -22618,10 +22614,10 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type,

GenTree* vecCon2 = gtCloneExpr(vecCon1);

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, vecCon1, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp2 = gtNewSimdHWIntrinsicNode(type, op2, vecCon2, NI_SSE2_And, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp1 =
gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp2 =
gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE41_PackUnsignedSaturate,
CORINFO_TYPE_USHORT, simdSize, isSimdAsHWIntrinsic);
Expand Down Expand Up @@ -22928,16 +22924,16 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type,
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;

GenTree* op1Dup = fgMakeMultiUse(&op1, clsHnd);
GenTree* op1Lower = gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
GenTree* op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector256_GetLower, simdBaseJitType,
simdSize, isSimdAsHWIntrinsic);

op2 = gtNewVconNode(TYP_SIMD16);
op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];

op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
isSimdAsHWIntrinsic);

GenTree* op1Upper = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128,
GenTree* op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128,
simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

op2 = gtNewVconNode(TYP_SIMD16);
Expand Down Expand Up @@ -23346,12 +23342,12 @@ GenTree* Compiler::gtNewSimdSumNode(
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(0x01, TYP_INT), NI_AVX_ExtractVector128,
simdBaseJitType, simdSize, isSimdAsHWIntrinsic);

tmp = gtNewSimdHWIntrinsicNode(simdType, tmp, NI_Vector256_GetLower, simdBaseJitType, simdSize,
tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, tmp, intrinsic, simdBaseJitType, 16, isSimdAsHWIntrinsic);
}

return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, 16, isSimdAsHWIntrinsic);
#elif defined(TARGET_ARM64)
switch (simdBaseType)
{
Expand Down Expand Up @@ -23544,8 +23540,8 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(!varTypeIsIntegral(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));

tmp1 =
gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);

switch (simdBaseType)
{
Expand Down Expand Up @@ -23673,7 +23669,8 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(

if (simdSize == 8)
{
tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, 16, isSimdAsHWIntrinsic);
tmp1 =
gtNewSimdHWIntrinsicNode(TYP_SIMD8, tmp1, NI_Vector128_GetLower, simdBaseJitType, 16, isSimdAsHWIntrinsic);
}

return tmp1;
Expand Down Expand Up @@ -23706,8 +23703,8 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(!varTypeIsIntegral(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));

tmp1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(1), NI_AVX_ExtractVector128, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(1), NI_AVX_ExtractVector128, simdBaseJitType,
simdSize, isSimdAsHWIntrinsic);

switch (simdBaseType)
{
Expand Down Expand Up @@ -23860,7 +23857,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(
zero = gtNewZeroConNode(TYP_SIMD16);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
simdBaseJitType, 16, isSimdAsHWIntrinsic);
return gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
return gtNewSimdHWIntrinsicNode(TYP_SIMD8, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
}
#else
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Expand Up @@ -424,7 +424,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
assert(retType == TYP_SIMD8);

op1 = impSIMDPopStack(TYP_SIMD16);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_GetLower, simdBaseJitType, simdSize);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Vector128_GetLower, simdBaseJitType, simdSize);
break;
}

Expand Down Expand Up @@ -1056,7 +1056,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

if (varTypeIsByte(simdBaseType) && (simdSize == 16))
{
CORINFO_CLASS_HANDLE simdClsHnd = gtGetStructHandleForSimdOrHW(simdType, simdBaseJitType);
CORINFO_CLASS_HANDLE simdClsHnd = gtGetStructHandleForSimdOrHW(TYP_SIMD16, simdBaseJitType);

op1 = impCloneExpr(op1, &op2, simdClsHnd, CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone op1 for vector extractmostsignificantbits"));
Expand All @@ -1069,10 +1069,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
/* isSimdAsHWIntrinsic */ false);
op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, TYP_INT);

GenTree* zero = gtNewZeroConNode(simdType);
GenTree* zero = gtNewZeroConNode(TYP_SIMD16);
ssize_t index = 8 / genTypeSize(simdBaseType);

op2 = gtNewSimdHWIntrinsicNode(simdType, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, NI_Vector128_GetLower, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsicxarch.cpp
Expand Up @@ -1382,7 +1382,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
simdVal.u64[3] = 0x8080808080808080;

shuffleIntrinsic = NI_AVX2_Shuffle;
moveMaskIntrinsic = NI_AVX2_MoveMask;
moveMaskIntrinsic = NI_SSE2_MoveMask;
}
else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3))
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.h
Expand Up @@ -349,13 +349,13 @@ class Lowering final : public Phase
GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
void LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
#if defined(TARGET_XARCH)
void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
Expand Down

0 comments on commit 12e9711

Please sign in to comment.