Skip to content

Commit

Permalink
Merge pull request #18484 from Akira1Saitoh/aarch64CInterpFrame041
Browse files Browse the repository at this point in the history
(0.41) AArch64: Add space for outgoing JNI argument to J9CInterpreterStackFrame
  • Loading branch information
pshipton committed Nov 20, 2023
2 parents a803f00 + 7494620 commit 461bf3c
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 89 deletions.
6 changes: 2 additions & 4 deletions runtime/codert_vm/arm64nathelp.m4
Expand Up @@ -165,8 +165,7 @@ define({OLD_DUAL_MODE_HELPER_NO_RETURN_VALUE},{

define({NEW_DUAL_MODE_HELPER},{
DECLARE_EXTERN(fast_$1)
START_PROC($1)
SAVE_FPLR
BEGIN_HELPER($1)
CALL_DIRECT(fast_$1)
cbz x0,.L_done_$1
ldr x30,JIT_GPR_SAVE_SLOT(30)
Expand All @@ -190,8 +189,7 @@ define({NEW_DUAL_MODE_HELPER},{

define({NEW_DUAL_MODE_HELPER_NO_RETURN_VALUE},{
DECLARE_EXTERN(fast_$1)
START_PROC($1)
SAVE_FPLR
BEGIN_HELPER($1)
CALL_DIRECT(fast_$1)
cbz x0,.L_done_$1
ldr x30,JIT_GPR_SAVE_SLOT(30)
Expand Down
48 changes: 12 additions & 36 deletions runtime/compiler/aarch64/codegen/ARM64JNILinkage.cpp
Expand Up @@ -395,7 +395,6 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
{
const TR::ARM64LinkageProperties &properties = _systemLinkage->getProperties();
TR::ARM64MemoryArgument *pushToMemory = NULL;
TR::Register *argMemReg;
TR::Register *tempReg;
int32_t argIndex = 0;
int32_t numMemArgs = 0;
Expand Down Expand Up @@ -503,12 +502,18 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
if (numMemArgs > 0)
{
pushToMemory = new (trStackMemory()) TR::ARM64MemoryArgument[numMemArgs];

argMemReg = cg()->allocateRegister();
}

// align to 16-byte boundary
totalSize = (totalSize + 15) & (~15);
/*
* We have J9_INLINE_JNI_MAX_ARG_COUNT (32) slots in interpreter frame.
* If the size of the native stack used for passing parameters exceeds this size,
* it will overwrite preserved registers.
* The number of parameters is guaranteed to be <= J9_INLINE_JNI_MAX_ARG_COUNT
* when HasFixedFrameC_CallingConvention codegen flag is set.
*/
TR_ASSERT_FATAL(totalSize <= sizeof(UDATA) * J9_INLINE_JNI_MAX_ARG_COUNT, "totalSize must not be larger than J9_INLINE_JNI_MAX_ARG_COUNT slots");

numIntegerArgs = 0;
numFloatArgs = 0;
Expand All @@ -522,6 +527,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#if defined(OSX)
sigCursor = (char *)sig;
#endif
TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister());

for (i = firstArgumentChild; i < callNode->getNumChildren(); i++)
{
Expand Down Expand Up @@ -611,7 +617,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#else
#error Unsupported platform
#endif
getOutgoingArgumentMemRef(argMemReg, argOffset, argRegister, op, pushToMemory[argIndex++]);
getOutgoingArgumentMemRef(sp, argOffset, argRegister, op, pushToMemory[argIndex++]);
argOffset += offsetInc;
}
numIntegerArgs++;
Expand Down Expand Up @@ -672,7 +678,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#error Unsupported platform
#endif
}
getOutgoingArgumentMemRef(argMemReg, argOffset, argRegister, op, pushToMemory[argIndex++]);
getOutgoingArgumentMemRef(sp, argOffset, argRegister, op, pushToMemory[argIndex++]);
argOffset += offsetInc;
}
numFloatArgs++;
Expand Down Expand Up @@ -746,17 +752,12 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen

if (numMemArgs > 0)
{
TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister());
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, argMemReg, sp, totalSize);

for (argIndex = 0; argIndex < numMemArgs; argIndex++)
{
TR::Register *aReg = pushToMemory[argIndex].argRegister;
generateMemSrc1Instruction(cg(), pushToMemory[argIndex].opCode, callNode, pushToMemory[argIndex].argMemory, aReg);
cg()->stopUsingRegister(aReg);
}

cg()->stopUsingRegister(argMemReg);
}

return totalSize;
Expand Down Expand Up @@ -1058,20 +1059,7 @@ TR::Register *J9::ARM64::JNILinkage::buildDirectDispatch(TR::Node *callNode)
#endif
TR::RegisterDependencyConditions *deps = new (trHeapMemory()) TR::RegisterDependencyConditions(maxRegisters, maxPostRegisters, trMemory());

size_t spSize = buildJNIArgs(callNode, deps, passThread, passReceiver, killNonVolatileGPRs);
TR::RealRegister *sp = machine()->getRealRegister(_systemLinkage->getProperties().getStackPointerRegister());

if (spSize > 0)
{
if (constantIsUnsignedImm12(spSize))
{
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, sp, sp, spSize);
}
else
{
TR_ASSERT_FATAL(false, "Too many arguments.");
}
}
buildJNIArgs(callNode, deps, passThread, passReceiver, killNonVolatileGPRs);

TR::Register *returnRegister = getReturnRegisterFromDeps(callNode, deps);

Expand Down Expand Up @@ -1111,18 +1099,6 @@ TR::Register *J9::ARM64::JNILinkage::buildDirectDispatch(TR::Node *callNode)
TR::Instruction *callInstr = generateMethodDispatch(callNode, isJNIGCPoint, deps, targetAddress, x9Reg);
generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, returnLabel, callInstr);

if (spSize > 0)
{
if (constantIsUnsignedImm12(spSize))
{
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::addimmx, callNode, sp, sp, spSize);
}
else
{
TR_ASSERT_FATAL(false, "Too many arguments.");
}
}

if (dropVMAccess)
{
#ifdef J9VM_INTERP_ATOMIC_FREE_JNI
Expand Down
3 changes: 3 additions & 0 deletions runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp
Expand Up @@ -76,6 +76,9 @@ J9::ARM64::CodeGenerator::initialize()
{
comp->setOption(TR_EnableMonitorCacheLookup);
}

if (comp->fej9()->hasFixedFrameC_CallingConvention())
cg->setHasFixedFrameC_CallingConvention();
}

TR::Linkage *
Expand Down
2 changes: 1 addition & 1 deletion runtime/compiler/env/VMJ9.cpp
Expand Up @@ -9662,7 +9662,7 @@ portLibCall_sysinfo_has_fixed_frame_C_calling_convention()

/* AArch64 */
#if defined(TR_HOST_ARM64) && defined(TR_TARGET_ARM64)
return false;
return true;
#endif

/* X86 */
Expand Down
114 changes: 66 additions & 48 deletions runtime/oti/arm64helpers.m4
Expand Up @@ -75,10 +75,23 @@ define({JIT_GPR_SAVE_OFFSET},{eval(J9TR_cframe_jitGPRs+(($1)*ALen))})
define({JIT_GPR_SAVE_SLOT},{[sp,{#}JIT_GPR_SAVE_OFFSET($1)]})
define({JIT_FPR_SAVE_OFFSET},{eval(J9TR_cframe_jitFPRs+(($1)*8))})
define({JIT_FPR_SAVE_SLOT},{[sp,{#}JIT_FPR_SAVE_OFFSET($1)]})
dnl The size of J9CInterpreterStackFrame is now larger than 504 bytes, so we cannot simply use sp as the base register for ldp/stp instructions
dnl because the offset would become too large and invalid for ldp/stp.
dnl Thus, we use sp + JIT_GPR_SAVE_OFFSET(0) as the base address for ldp/stp. We chose x16 as the base register.
dnl It is intra procedure call register and JIT does not use it, so it is safe to clobber x16.
define({JIT_GPR_SAVE_SLOT_X16},{[x16,{#}eval(($1)*ALen)]})
define({JIT_FPR_SAVE_SLOT_X16},{[x16,{#}eval(JIT_FPR_SAVE_OFFSET($1)-JIT_GPR_SAVE_OFFSET(0))]})

define({SAVE_FPLR},{stp x29,x30,JIT_GPR_SAVE_SLOT(29)})
dnl JIT_GPR_SAVE_OFFSET is too large to fit in ldp/stp instructions.
define({SAVE_FPLR},{
str x29,JIT_GPR_SAVE_SLOT(29)
str x30,JIT_GPR_SAVE_SLOT(30)
})

define({RESTORE_FPLR},{ldp x29,x30,JIT_GPR_SAVE_SLOT(29)})
define({RESTORE_FPLR},{
ldr x29,JIT_GPR_SAVE_SLOT(29)
ldr x30,JIT_GPR_SAVE_SLOT(30)
})

define({BEGIN_HELPER},{
START_PROC($1)
Expand All @@ -95,22 +108,23 @@ define({CALL_C_WITH_VMTHREAD},{
CALL_DIRECT($1)
})

dnl x16 is inter-procedure-call register
define({SAVE_C_VOLATILE_REGS},{
stp x0,x1,JIT_GPR_SAVE_SLOT(0)
stp x2,x3,JIT_GPR_SAVE_SLOT(2)
stp x4,x5,JIT_GPR_SAVE_SLOT(4)
stp x6,x7,JIT_GPR_SAVE_SLOT(6)
stp x8,x9,JIT_GPR_SAVE_SLOT(8)
stp x10,x11,JIT_GPR_SAVE_SLOT(10)
stp x12,x13,JIT_GPR_SAVE_SLOT(12)
stp x14,x15,JIT_GPR_SAVE_SLOT(14)
stp x16,x17,JIT_GPR_SAVE_SLOT(16)
str x18,JIT_GPR_SAVE_SLOT(18)
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
stp x0,x1,JIT_GPR_SAVE_SLOT_X16(0)
stp x2,x3,JIT_GPR_SAVE_SLOT_X16(2)
stp x4,x5,JIT_GPR_SAVE_SLOT_X16(4)
stp x6,x7,JIT_GPR_SAVE_SLOT_X16(6)
stp x8,x9,JIT_GPR_SAVE_SLOT_X16(8)
stp x10,x11,JIT_GPR_SAVE_SLOT_X16(10)
stp x12,x13,JIT_GPR_SAVE_SLOT_X16(12)
stp x14,x15,JIT_GPR_SAVE_SLOT_X16(14)
stp x17,x18,JIT_GPR_SAVE_SLOT_X16(17)
ifdef({METHOD_INVOCATION},{
stp d0,d1,JIT_FPR_SAVE_SLOT(0)
stp d2,d3,JIT_FPR_SAVE_SLOT(2)
stp d4,d5,JIT_FPR_SAVE_SLOT(4)
stp d6,d7,JIT_FPR_SAVE_SLOT(6)
stp d0,d1,JIT_FPR_SAVE_SLOT_X16(0)
stp d2,d3,JIT_FPR_SAVE_SLOT_X16(2)
stp d4,d5,JIT_FPR_SAVE_SLOT_X16(4)
stp d6,d7,JIT_FPR_SAVE_SLOT_X16(6)
},{ dnl METHOD_INVOCATION
add x15, sp, JIT_FPR_SAVE_OFFSET(0)
st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [x15], #64
Expand All @@ -125,11 +139,12 @@ ifdef({METHOD_INVOCATION},{
})

define({RESTORE_C_VOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ifdef({METHOD_INVOCATION},{
ldp d0,d1,JIT_FPR_SAVE_SLOT(0)
ldp d2,d3,JIT_FPR_SAVE_SLOT(2)
ldp d4,d5,JIT_FPR_SAVE_SLOT(4)
ldp d6,d7,JIT_FPR_SAVE_SLOT(6)
ldp d0,d1,JIT_FPR_SAVE_SLOT_X16(0)
ldp d2,d3,JIT_FPR_SAVE_SLOT_X16(2)
ldp d4,d5,JIT_FPR_SAVE_SLOT_X16(4)
ldp d6,d7,JIT_FPR_SAVE_SLOT_X16(6)
},{ dnl METHOD_INVOCATION
add x15, sp, JIT_FPR_SAVE_OFFSET(0)
ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [x15], #64
Expand All @@ -141,37 +156,38 @@ ifdef({METHOD_INVOCATION},{
ld1 {{v24.4s, v25.4s, v26.4s, v27.4s}}, [x15], #64
ld1 {{v28.4s, v29.4s, v30.4s, v31.4s}}, [x15]
}) dnl METHOD_INVOCATION
ldp x0,x1,JIT_GPR_SAVE_SLOT(0)
ldp x2,x3,JIT_GPR_SAVE_SLOT(2)
ldp x4,x5,JIT_GPR_SAVE_SLOT(4)
ldp x6,x7,JIT_GPR_SAVE_SLOT(6)
ldp x8,x9,JIT_GPR_SAVE_SLOT(8)
ldp x10,x11,JIT_GPR_SAVE_SLOT(10)
ldp x12,x13,JIT_GPR_SAVE_SLOT(12)
ldp x14,x15,JIT_GPR_SAVE_SLOT(14)
ldp x16,x17,JIT_GPR_SAVE_SLOT(16)
ldr x18,JIT_GPR_SAVE_SLOT(18)
ldp x0,x1,JIT_GPR_SAVE_SLOT_X16(0)
ldp x2,x3,JIT_GPR_SAVE_SLOT_X16(2)
ldp x4,x5,JIT_GPR_SAVE_SLOT_X16(4)
ldp x6,x7,JIT_GPR_SAVE_SLOT_X16(6)
ldp x8,x9,JIT_GPR_SAVE_SLOT_X16(8)
ldp x10,x11,JIT_GPR_SAVE_SLOT_X16(10)
ldp x12,x13,JIT_GPR_SAVE_SLOT_X16(12)
ldp x14,x15,JIT_GPR_SAVE_SLOT_X16(14)
ldp x17,x18,JIT_GPR_SAVE_SLOT_X16(17)
})

dnl No need to save/restore d8-15 - the stack walker will never need to read
dnl or modify them (no preserved FPRs in the JIT private linkage).

define({SAVE_C_NONVOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
str x19,JIT_GPR_SAVE_SLOT(19)
stp x20,x21,JIT_GPR_SAVE_SLOT(20)
stp x22,x23,JIT_GPR_SAVE_SLOT(22)
stp x24,x25,JIT_GPR_SAVE_SLOT(24)
stp x26,x27,JIT_GPR_SAVE_SLOT(26)
stp x28,x29,JIT_GPR_SAVE_SLOT(28)
stp x20,x21,JIT_GPR_SAVE_SLOT_X16(20)
stp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
stp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
stp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
stp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({RESTORE_C_NONVOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ldr x19,JIT_GPR_SAVE_SLOT(19)
ldp x20,x21,JIT_GPR_SAVE_SLOT(20)
ldp x22,x23,JIT_GPR_SAVE_SLOT(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT(28)
ldp x20,x21,JIT_GPR_SAVE_SLOT_X16(20)
ldp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({SAVE_ALL_REGS},{
Expand All @@ -185,21 +201,23 @@ define({RESTORE_ALL_REGS},{
})

define({SAVE_PRESERVED_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
str x18,JIT_GPR_SAVE_SLOT(18)
str x21,JIT_GPR_SAVE_SLOT(21)
stp x22,x23,JIT_GPR_SAVE_SLOT(22)
stp x24,x25,JIT_GPR_SAVE_SLOT(24)
stp x26,x27,JIT_GPR_SAVE_SLOT(26)
stp x28,x29,JIT_GPR_SAVE_SLOT(28)
stp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
stp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
stp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
stp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({RESTORE_PRESERVED_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ldr x18,JIT_GPR_SAVE_SLOT(18)
ldr x21,JIT_GPR_SAVE_SLOT(21)
ldp x22,x23,JIT_GPR_SAVE_SLOT(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT(28)
ldp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({BRANCH_VIA_VMTHREAD},{
Expand Down
1 change: 1 addition & 0 deletions runtime/oti/j9nonbuilder.h
Expand Up @@ -6287,6 +6287,7 @@ typedef struct J9CInterpreterStackFrame {
U_8 jitFPRs[16 * 8]; /* fpr0-15 */
#endif /* J9VM_ENV_DATA64 */
#elif defined(J9VM_ARCH_AARCH64) /* J9VM_ARCH_ARM */
UDATA outgoingArguments[J9_INLINE_JNI_MAX_ARG_COUNT];
UDATA preservedGPRs[12]; /* x19-x30 */
U_8 preservedFPRs[8 * 8]; /* v8-15 */
J9JITGPRSpillArea jitGPRs;
Expand Down

0 comments on commit 461bf3c

Please sign in to comment.