Skip to content

Commit

Permalink
Save 260k in InitValueNumStoreStatics (#85945)
Browse files Browse the repository at this point in the history
It appears that the big #define for the intrinsics is causing InitValueNumStoreStatics to get big enough that C++ optimization ends up being disabled, which means a lot of constant operations aren't folded. This rewrites it as a const table. It adds some redundant information to the tables that we #include/#define in several places but currently includes many assertions that the old and new values match.

Local size change of release clrjit_win_x64_x64.dll: 2,001,920 -> 1,735,680.  InitValueNumStoreStatics (261k) is replaced by 1.2k of static data.

Resolves #85953
  • Loading branch information
markples committed Jun 5, 2023
1 parent 2bf8f1a commit 9f8f653
Show file tree
Hide file tree
Showing 12 changed files with 2,204 additions and 2,126 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.cpp
Expand Up @@ -1329,7 +1329,7 @@ void Compiler::compStartup()
emitter::emitInit();

// Static vars of ValueNumStore
ValueNumStore::InitValueNumStoreStatics();
ValueNumStore::ValidateValueNumStoreStatics();

compDisplayStaticSizes(jitstdout);
}
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/jit/gentree.cpp
Expand Up @@ -21,13 +21,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
/*****************************************************************************/

const unsigned char GenTree::gtOperKindTable[] = {
#define GTNODE(en, st, cm, ok) ((ok)&GTK_MASK) + GTK_COMMUTE *cm,
#define GTNODE(en, st, cm, ivn, ok) ((ok)&GTK_MASK) + GTK_COMMUTE *cm,
#include "gtlist.h"
};

#ifdef DEBUG
const GenTreeDebugOperKind GenTree::gtDebugOperKindTable[] = {
#define GTNODE(en, st, cm, ok) static_cast<GenTreeDebugOperKind>((ok)&DBK_MASK),
#define GTNODE(en, st, cm, ivn, ok) static_cast<GenTreeDebugOperKind>((ok)&DBK_MASK),
#include "gtlist.h"
};
#endif // DEBUG
Expand Down Expand Up @@ -171,7 +171,7 @@ static void printIndent(IndentStack* indentStack)
#if defined(DEBUG) || NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS || DUMP_FLOWGRAPHS

static const char* opNames[] = {
#define GTNODE(en, st, cm, ok) #en,
#define GTNODE(en, st, cm, ivn, ok) #en,
#include "gtlist.h"
};

Expand All @@ -187,7 +187,7 @@ const char* GenTree::OpName(genTreeOps op)
#if MEASURE_NODE_SIZE

static const char* opStructNames[] = {
#define GTNODE(en, st, cm, ok) #st,
#define GTNODE(en, st, cm, ivn, ok) #st,
#include "gtlist.h"
};

Expand All @@ -214,7 +214,7 @@ unsigned char GenTree::s_gtNodeSizes[GT_COUNT + 1];
#if NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS

unsigned char GenTree::s_gtTrueSizes[GT_COUNT + 1]{
#define GTNODE(en, st, cm, ok) sizeof(st),
#define GTNODE(en, st, cm, ivn, ok) sizeof(st),
#include "gtlist.h"
};

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Expand Up @@ -1087,7 +1087,7 @@ struct GenTree
return TypeIs(type) || TypeIs(rest...);
}

static bool StaticOperIs(genTreeOps operCompare, genTreeOps oper)
static constexpr bool StaticOperIs(genTreeOps operCompare, genTreeOps oper)
{
return operCompare == oper;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentreeopsdef.h
Expand Up @@ -8,7 +8,7 @@

enum genTreeOps : BYTE
{
#define GTNODE(en, st, cm, ok) GT_##en,
#define GTNODE(en, st, cm, ivn, ok) GT_##en,
#include "gtlist.h"

GT_COUNT,
Expand Down
289 changes: 145 additions & 144 deletions src/coreclr/jit/gtlist.h

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Expand Up @@ -9,7 +9,7 @@
static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
// clang-format off
#if defined(TARGET_XARCH)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
{ \
/* name */ #name, \
/* flags */ static_cast<HWIntrinsicFlag>(flag), \
Expand All @@ -22,7 +22,7 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
},
#include "hwintrinsiclistxarch.h"
#elif defined (TARGET_ARM64)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
{ \
/* name */ #name, \
/* flags */ static_cast<HWIntrinsicFlag>(flag), \
Expand Down
1,372 changes: 686 additions & 686 deletions src/coreclr/jit/hwintrinsiclistarm64.h

Large diffs are not rendered by default.

2,192 changes: 1,097 additions & 1,095 deletions src/coreclr/jit/hwintrinsiclistxarch.h

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/coreclr/jit/namedintrinsiclist.h
Expand Up @@ -126,11 +126,11 @@ enum NamedIntrinsic : unsigned short
#ifdef FEATURE_HW_INTRINSICS
NI_HW_INTRINSIC_START,
#if defined(TARGET_XARCH)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
NI_##isa##_##name,
#include "hwintrinsiclistxarch.h"
#elif defined(TARGET_ARM64)
#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
#define HARDWARE_INTRINSIC(isa, name, size, numarg, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
NI_##isa##_##name,
#include "hwintrinsiclistarm64.h"
#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64)
Expand Down
106 changes: 82 additions & 24 deletions src/coreclr/jit/valuenum.cpp
Expand Up @@ -8852,7 +8852,65 @@ void ValueNumStore::vnDumpZeroObj(Compiler* comp, VNFuncApp* zeroObj)
#endif // DEBUG

// Static fields, methods.
static UINT8 vnfOpAttribs[VNF_COUNT];

#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) \
static_assert((arity) >= 0 || !(extra), "valuenumfuncs.h has EncodesExtraTypeArg==true and arity<0 for " #vnf);
#include "valuenumfuncs.h"

#ifdef FEATURE_HW_INTRINSICS

#define HARDWARE_INTRINSIC(isa, name, size, argCount, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
static_assert((size) != 0 || !(extra), \
"hwintrinsicslist<arch>.h has EncodesExtraTypeArg==true and size==0 for " #isa " " #name);
#if defined(TARGET_XARCH)
#include "hwintrinsiclistxarch.h"
#elif defined(TARGET_ARM64)
#include "hwintrinsiclistarm64.h"
#else
#error Unsupported platform
#endif

#endif // FEATURE_HW_INTRINSICS

/* static */ constexpr uint8_t ValueNumStore::GetOpAttribsForArity(genTreeOps oper, GenTreeOperKind kind)
{
return ((GenTree::StaticOperIs(oper, GT_SELECT) ? 3 : (((kind & GTK_UNOP) >> 1) | ((kind & GTK_BINOP) >> 1)))
<< VNFOA_ArityShift) &
VNFOA_ArityMask;
}

/* static */ constexpr uint8_t ValueNumStore::GetOpAttribsForGenTree(genTreeOps oper,
bool commute,
bool illegalAsVNFunc,
GenTreeOperKind kind)
{
return GetOpAttribsForArity(oper, kind) | (static_cast<uint8_t>(commute) << VNFOA_CommutativeShift) |
(static_cast<uint8_t>(illegalAsVNFunc) << VNFOA_IllegalGenTreeOpShift);
}

/* static */ constexpr uint8_t ValueNumStore::GetOpAttribsForFunc(int arity,
bool commute,
bool knownNonNull,
bool sharedStatic)
{
return (static_cast<uint8_t>(commute) << VNFOA_CommutativeShift) |
(static_cast<uint8_t>(knownNonNull) << VNFOA_KnownNonNullShift) |
(static_cast<uint8_t>(sharedStatic) << VNFOA_SharedStaticShift) |
((static_cast<uint8_t>(arity & ~(arity >> 31)) << VNFOA_ArityShift) & VNFOA_ArityMask);
}

const uint8_t ValueNumStore::s_vnfOpAttribs[VNF_COUNT] = {
#define GTNODE(en, st, cm, ivn, ok) \
GetOpAttribsForGenTree(static_cast<genTreeOps>(GT_##en), cm, ivn, static_cast<GenTreeOperKind>(ok)),
#include "gtlist.h"

0, // VNF_Boundary

#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) \
GetOpAttribsForFunc((arity) + static_cast<int>(extra), commute, knownNonNull, sharedStatic),
#include "valuenumfuncs.h"
};

static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memory.
GT_NULLCHECK, GT_QMARK, GT_COLON, GT_LOCKADD, GT_XADD, GT_XCHG,
GT_CMPXCHG, GT_LCLHEAP, GT_BOX, GT_XORR, GT_XAND, GT_STORE_DYN_BLK,
Expand All @@ -8869,15 +8927,10 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo
// These control-flow operations need no values.
GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE};

UINT8* ValueNumStore::s_vnfOpAttribs = nullptr;

void ValueNumStore::InitValueNumStoreStatics()
void ValueNumStore::ValidateValueNumStoreStatics()
{
// Make sure we have the constants right...
assert(unsigned(VNFOA_Arity1) == (1 << VNFOA_ArityShift));
assert(VNFOA_ArityMask == (VNFOA_MaxArity << VNFOA_ArityShift));

s_vnfOpAttribs = &vnfOpAttribs[0];
#if DEBUG
uint8_t arr[VNF_COUNT] = {};
for (unsigned i = 0; i < GT_COUNT; i++)
{
genTreeOps gtOper = static_cast<genTreeOps>(i);
Expand All @@ -8895,37 +8948,36 @@ void ValueNumStore::InitValueNumStoreStatics()
arity = 3;
}

vnfOpAttribs[i] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask);
arr[i] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask);

if (GenTree::OperIsCommutative(gtOper))
{
vnfOpAttribs[i] |= VNFOA_Commutative;
arr[i] |= VNFOA_Commutative;
}
}

// I so wish this wasn't the best way to do this...

int vnfNum = VNF_Boundary + 1; // The macro definition below will update this after using it.

#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) \
if (commute) \
vnfOpAttribs[vnfNum] |= VNFOA_Commutative; \
arr[vnfNum] |= VNFOA_Commutative; \
if (knownNonNull) \
vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \
arr[vnfNum] |= VNFOA_KnownNonNull; \
if (sharedStatic) \
vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \
arr[vnfNum] |= VNFOA_SharedStatic; \
if (arity > 0) \
vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); \
arr[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); \
vnfNum++;

#include "valuenumfuncs.h"
#undef ValueNumFuncDef

assert(vnfNum == VNF_COUNT);

#define ValueNumFuncSetArity(vnfNum, arity) \
vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \
vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask) /* set the new arity value */
arr[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \
arr[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask) /* set the new arity value */

#ifdef FEATURE_HW_INTRINSICS

Expand All @@ -8939,7 +8991,7 @@ void ValueNumStore::InitValueNumStoreStatics()
// These HW_Intrinsic's have an extra VNF_SimdType arg.
//
VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1));
unsigned oldArity = VNFuncArity(func);
unsigned oldArity = (arr[func] & VNFOA_ArityMask) >> VNFOA_ArityShift;
unsigned newArity = oldArity + 1;

ValueNumFuncSetArity(func, newArity);
Expand All @@ -8948,7 +9000,7 @@ void ValueNumStore::InitValueNumStoreStatics()
if (HWIntrinsicInfo::IsCommutative(id))
{
VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1));
vnfOpAttribs[func] |= VNFOA_Commutative;
arr[func] |= VNFOA_Commutative;
}
}

Expand All @@ -8958,17 +9010,23 @@ void ValueNumStore::InitValueNumStoreStatics()

for (unsigned i = 0; i < ArrLen(genTreeOpsIllegalAsVNFunc); i++)
{
vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp;
arr[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp;
}

assert(ArrLen(arr) == ArrLen(s_vnfOpAttribs));
for (unsigned i = 0; i < ArrLen(arr); i++)
{
assert(arr[i] == s_vnfOpAttribs[i]);
}
#endif // DEBUG
}

#ifdef DEBUG
// Define the name array.
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) #vnf,
#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic, extra) #vnf,

const char* ValueNumStore::VNFuncNameArr[] = {
#include "valuenumfuncs.h"
#undef ValueNumFuncDef
};

/* static */ const char* ValueNumStore::VNFuncName(VNFunc vnf)
Expand Down
35 changes: 26 additions & 9 deletions src/coreclr/jit/valuenum.h
Expand Up @@ -167,7 +167,7 @@ enum VNFunc
{
// Implicitly, elements of genTreeOps here.
VNF_Boundary = GT_COUNT,
#define ValueNumFuncDef(nm, arity, commute, knownNonNull, sharedStatic) VNF_##nm,
#define ValueNumFuncDef(nm, arity, commute, knownNonNull, sharedStatic, extra) VNF_##nm,
#include "valuenumfuncs.h"
VNF_COUNT
};
Expand Down Expand Up @@ -275,10 +275,21 @@ class ValueNumStore
VNFOA_SharedStatic = 0x40, // 1 iff this VNF is represent one of the shared static jit helpers
};

static const unsigned VNFOA_ArityShift = 2;
static const unsigned VNFOA_ArityBits = 3;
static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent.
static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2 | VNFOA_Arity1);
static const unsigned VNFOA_IllegalGenTreeOpShift = 0;
static const unsigned VNFOA_CommutativeShift = 1;
static const unsigned VNFOA_ArityShift = 2;
static const unsigned VNFOA_ArityBits = 3;
static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent.
static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2 | VNFOA_Arity1);
static const unsigned VNFOA_KnownNonNullShift = 5;
static const unsigned VNFOA_SharedStaticShift = 6;

static_assert(unsigned(VNFOA_IllegalGenTreeOp) == (1 << VNFOA_IllegalGenTreeOpShift));
static_assert(unsigned(VNFOA_Commutative) == (1 << VNFOA_CommutativeShift));
static_assert(unsigned(VNFOA_Arity1) == (1 << VNFOA_ArityShift));
static_assert(VNFOA_ArityMask == (VNFOA_MaxArity << VNFOA_ArityShift));
static_assert(unsigned(VNFOA_KnownNonNull) == (1 << VNFOA_KnownNonNullShift));
static_assert(unsigned(VNFOA_SharedStatic) == (1 << VNFOA_SharedStaticShift));

// These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper
enum VNFCastAttrib
Expand All @@ -289,8 +300,14 @@ class ValueNumStore
VCA_ReservedBits = 0x01, // i.e. (VCA_UnsignedSrc)
};

// An array of length GT_COUNT, mapping genTreeOp values to their VNFOpAttrib.
static UINT8* s_vnfOpAttribs;
// Helpers and an array of length GT_COUNT, mapping genTreeOp values to their VNFOpAttrib.
static constexpr uint8_t GetOpAttribsForArity(genTreeOps oper, GenTreeOperKind kind);
static constexpr uint8_t GetOpAttribsForGenTree(genTreeOps oper,
bool commute,
bool illegalAsVNFunc,
GenTreeOperKind kind);
static constexpr uint8_t GetOpAttribsForFunc(int arity, bool commute, bool knownNonNull, bool sharedStatic);
static const uint8_t s_vnfOpAttribs[];

// Returns "true" iff gtOper is a legal value number function.
// (Requires InitValueNumStoreStatics to have been run.)
Expand Down Expand Up @@ -383,8 +400,8 @@ class ValueNumStore
GenTreeFlags GetFoldedArithOpResultHandleFlags(ValueNum vn);

public:
// Initializes any static variables of ValueNumStore.
static void InitValueNumStoreStatics();
// Validate that the new initializer for s_vnfOpAttribs matches the old code.
static void ValidateValueNumStoreStatics();

// Initialize an empty ValueNumStore.
ValueNumStore(Compiler* comp, CompAllocator allocator);
Expand Down

0 comments on commit 9f8f653

Please sign in to comment.