Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AArch64: Add space for outgoing JNI argument to J9CInterpreterStackFrame #18227

Merged
merged 1 commit into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions runtime/codert_vm/arm64nathelp.m4
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ define({OLD_DUAL_MODE_HELPER_NO_RETURN_VALUE},{

define({NEW_DUAL_MODE_HELPER},{
DECLARE_EXTERN(fast_$1)
START_PROC($1)
SAVE_FPLR
BEGIN_HELPER($1)
CALL_DIRECT(fast_$1)
cbz x0,.L_done_$1
ldr x30,JIT_GPR_SAVE_SLOT(30)
Expand All @@ -190,8 +189,7 @@ define({NEW_DUAL_MODE_HELPER},{

define({NEW_DUAL_MODE_HELPER_NO_RETURN_VALUE},{
DECLARE_EXTERN(fast_$1)
START_PROC($1)
SAVE_FPLR
BEGIN_HELPER($1)
CALL_DIRECT(fast_$1)
cbz x0,.L_done_$1
ldr x30,JIT_GPR_SAVE_SLOT(30)
Expand Down
48 changes: 12 additions & 36 deletions runtime/compiler/aarch64/codegen/ARM64JNILinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
{
const TR::ARM64LinkageProperties &properties = _systemLinkage->getProperties();
TR::ARM64MemoryArgument *pushToMemory = NULL;
TR::Register *argMemReg;
TR::Register *tempReg;
int32_t argIndex = 0;
int32_t numMemArgs = 0;
Expand Down Expand Up @@ -503,12 +502,18 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
if (numMemArgs > 0)
{
pushToMemory = new (trStackMemory()) TR::ARM64MemoryArgument[numMemArgs];

argMemReg = cg()->allocateRegister();
}

// align to 16-byte boundary
totalSize = (totalSize + 15) & (~15);
/*
* We have J9_INLINE_JNI_MAX_ARG_COUNT (32) slots in interpreter frame.
* If the size of the native stack used for passing parameters exceeds this size,
* it will overwrite preserved registers.
* The number of parameters is guaranteed to be <= J9_INLINE_JNI_MAX_ARG_COUNT
* when HasFixedFrameC_CallingConvention codegen flag is set.
*/
TR_ASSERT_FATAL(totalSize <= sizeof(UDATA) * J9_INLINE_JNI_MAX_ARG_COUNT, "totalSize must not be larger than J9_INLINE_JNI_MAX_ARG_COUNT slots");

numIntegerArgs = 0;
numFloatArgs = 0;
Expand All @@ -522,6 +527,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#if defined(OSX)
sigCursor = (char *)sig;
#endif
TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister());

for (i = firstArgumentChild; i < callNode->getNumChildren(); i++)
{
Expand Down Expand Up @@ -611,7 +617,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#else
#error Unsupported platform
#endif
getOutgoingArgumentMemRef(argMemReg, argOffset, argRegister, op, pushToMemory[argIndex++]);
getOutgoingArgumentMemRef(sp, argOffset, argRegister, op, pushToMemory[argIndex++]);
argOffset += offsetInc;
}
numIntegerArgs++;
Expand Down Expand Up @@ -672,7 +678,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#error Unsupported platform
#endif
}
getOutgoingArgumentMemRef(argMemReg, argOffset, argRegister, op, pushToMemory[argIndex++]);
getOutgoingArgumentMemRef(sp, argOffset, argRegister, op, pushToMemory[argIndex++]);
argOffset += offsetInc;
}
numFloatArgs++;
Expand Down Expand Up @@ -746,17 +752,12 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen

if (numMemArgs > 0)
{
TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister());
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, argMemReg, sp, totalSize);

for (argIndex = 0; argIndex < numMemArgs; argIndex++)
{
TR::Register *aReg = pushToMemory[argIndex].argRegister;
generateMemSrc1Instruction(cg(), pushToMemory[argIndex].opCode, callNode, pushToMemory[argIndex].argMemory, aReg);
cg()->stopUsingRegister(aReg);
}

cg()->stopUsingRegister(argMemReg);
}

return totalSize;
Expand Down Expand Up @@ -1058,20 +1059,7 @@ TR::Register *J9::ARM64::JNILinkage::buildDirectDispatch(TR::Node *callNode)
#endif
TR::RegisterDependencyConditions *deps = new (trHeapMemory()) TR::RegisterDependencyConditions(maxRegisters, maxPostRegisters, trMemory());

size_t spSize = buildJNIArgs(callNode, deps, passThread, passReceiver, killNonVolatileGPRs);
TR::RealRegister *sp = machine()->getRealRegister(_systemLinkage->getProperties().getStackPointerRegister());

if (spSize > 0)
{
if (constantIsUnsignedImm12(spSize))
{
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, sp, sp, spSize);
}
else
{
TR_ASSERT_FATAL(false, "Too many arguments.");
}
}
buildJNIArgs(callNode, deps, passThread, passReceiver, killNonVolatileGPRs);

TR::Register *returnRegister = getReturnRegisterFromDeps(callNode, deps);

Expand Down Expand Up @@ -1111,18 +1099,6 @@ TR::Register *J9::ARM64::JNILinkage::buildDirectDispatch(TR::Node *callNode)
TR::Instruction *callInstr = generateMethodDispatch(callNode, isJNIGCPoint, deps, targetAddress, x9Reg);
generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, returnLabel, callInstr);

if (spSize > 0)
{
if (constantIsUnsignedImm12(spSize))
{
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::addimmx, callNode, sp, sp, spSize);
}
else
{
TR_ASSERT_FATAL(false, "Too many arguments.");
}
}

if (dropVMAccess)
{
#ifdef J9VM_INTERP_ATOMIC_FREE_JNI
Expand Down
3 changes: 3 additions & 0 deletions runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ J9::ARM64::CodeGenerator::initialize()
{
comp->setOption(TR_EnableMonitorCacheLookup);
}

if (comp->fej9()->hasFixedFrameC_CallingConvention())
cg->setHasFixedFrameC_CallingConvention();
}

TR::Linkage *
Expand Down
2 changes: 1 addition & 1 deletion runtime/compiler/env/VMJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9654,7 +9654,7 @@ portLibCall_sysinfo_has_fixed_frame_C_calling_convention()

/* AArch64 */
#if defined(TR_HOST_ARM64) && defined(TR_TARGET_ARM64)
return false;
return true;
#endif

/* X86 */
Expand Down
114 changes: 66 additions & 48 deletions runtime/oti/arm64helpers.m4
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,23 @@ define({JIT_GPR_SAVE_OFFSET},{eval(J9TR_cframe_jitGPRs+(($1)*ALen))})
define({JIT_GPR_SAVE_SLOT},{[sp,{#}JIT_GPR_SAVE_OFFSET($1)]})
define({JIT_FPR_SAVE_OFFSET},{eval(J9TR_cframe_jitFPRs+(($1)*8))})
define({JIT_FPR_SAVE_SLOT},{[sp,{#}JIT_FPR_SAVE_OFFSET($1)]})
dnl The size of J9CInterpreterStackFrame is now larger than 504 bytes, so we cannot simply use sp as the base register for ldp/stp instructions
dnl because the offset would become too large and invalid for ldp/stp.
dnl Thus, we use sp + JIT_GPR_SAVE_OFFSET(0) as the base address for ldp/stp. We chose x16 as the base register.
dnl It is intra procedure call register and JIT does not use it, so it is safe to clobber x16.
define({JIT_GPR_SAVE_SLOT_X16},{[x16,{#}eval(($1)*ALen)]})
define({JIT_FPR_SAVE_SLOT_X16},{[x16,{#}eval(JIT_FPR_SAVE_OFFSET($1)-JIT_GPR_SAVE_OFFSET(0))]})

define({SAVE_FPLR},{stp x29,x30,JIT_GPR_SAVE_SLOT(29)})
dnl JIT_GPR_SAVE_OFFSET is too large to fit in ldp/stp instructions.
define({SAVE_FPLR},{
str x29,JIT_GPR_SAVE_SLOT(29)
str x30,JIT_GPR_SAVE_SLOT(30)
})

define({RESTORE_FPLR},{ldp x29,x30,JIT_GPR_SAVE_SLOT(29)})
define({RESTORE_FPLR},{
ldr x29,JIT_GPR_SAVE_SLOT(29)
ldr x30,JIT_GPR_SAVE_SLOT(30)
})

define({BEGIN_HELPER},{
START_PROC($1)
Expand All @@ -95,22 +108,23 @@ define({CALL_C_WITH_VMTHREAD},{
CALL_DIRECT($1)
})

dnl x16 is inter-procedure-call register
define({SAVE_C_VOLATILE_REGS},{
stp x0,x1,JIT_GPR_SAVE_SLOT(0)
stp x2,x3,JIT_GPR_SAVE_SLOT(2)
stp x4,x5,JIT_GPR_SAVE_SLOT(4)
stp x6,x7,JIT_GPR_SAVE_SLOT(6)
stp x8,x9,JIT_GPR_SAVE_SLOT(8)
stp x10,x11,JIT_GPR_SAVE_SLOT(10)
stp x12,x13,JIT_GPR_SAVE_SLOT(12)
stp x14,x15,JIT_GPR_SAVE_SLOT(14)
stp x16,x17,JIT_GPR_SAVE_SLOT(16)
str x18,JIT_GPR_SAVE_SLOT(18)
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
stp x0,x1,JIT_GPR_SAVE_SLOT_X16(0)
stp x2,x3,JIT_GPR_SAVE_SLOT_X16(2)
stp x4,x5,JIT_GPR_SAVE_SLOT_X16(4)
stp x6,x7,JIT_GPR_SAVE_SLOT_X16(6)
stp x8,x9,JIT_GPR_SAVE_SLOT_X16(8)
stp x10,x11,JIT_GPR_SAVE_SLOT_X16(10)
stp x12,x13,JIT_GPR_SAVE_SLOT_X16(12)
stp x14,x15,JIT_GPR_SAVE_SLOT_X16(14)
stp x17,x18,JIT_GPR_SAVE_SLOT_X16(17)
ifdef({METHOD_INVOCATION},{
stp d0,d1,JIT_FPR_SAVE_SLOT(0)
stp d2,d3,JIT_FPR_SAVE_SLOT(2)
stp d4,d5,JIT_FPR_SAVE_SLOT(4)
stp d6,d7,JIT_FPR_SAVE_SLOT(6)
stp d0,d1,JIT_FPR_SAVE_SLOT_X16(0)
stp d2,d3,JIT_FPR_SAVE_SLOT_X16(2)
stp d4,d5,JIT_FPR_SAVE_SLOT_X16(4)
stp d6,d7,JIT_FPR_SAVE_SLOT_X16(6)
},{ dnl METHOD_INVOCATION
add x15, sp, JIT_FPR_SAVE_OFFSET(0)
st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [x15], #64
Expand All @@ -125,11 +139,12 @@ ifdef({METHOD_INVOCATION},{
})

define({RESTORE_C_VOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ifdef({METHOD_INVOCATION},{
ldp d0,d1,JIT_FPR_SAVE_SLOT(0)
ldp d2,d3,JIT_FPR_SAVE_SLOT(2)
ldp d4,d5,JIT_FPR_SAVE_SLOT(4)
ldp d6,d7,JIT_FPR_SAVE_SLOT(6)
ldp d0,d1,JIT_FPR_SAVE_SLOT_X16(0)
ldp d2,d3,JIT_FPR_SAVE_SLOT_X16(2)
ldp d4,d5,JIT_FPR_SAVE_SLOT_X16(4)
ldp d6,d7,JIT_FPR_SAVE_SLOT_X16(6)
},{ dnl METHOD_INVOCATION
add x15, sp, JIT_FPR_SAVE_OFFSET(0)
ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [x15], #64
Expand All @@ -141,37 +156,38 @@ ifdef({METHOD_INVOCATION},{
ld1 {{v24.4s, v25.4s, v26.4s, v27.4s}}, [x15], #64
ld1 {{v28.4s, v29.4s, v30.4s, v31.4s}}, [x15]
}) dnl METHOD_INVOCATION
ldp x0,x1,JIT_GPR_SAVE_SLOT(0)
ldp x2,x3,JIT_GPR_SAVE_SLOT(2)
ldp x4,x5,JIT_GPR_SAVE_SLOT(4)
ldp x6,x7,JIT_GPR_SAVE_SLOT(6)
ldp x8,x9,JIT_GPR_SAVE_SLOT(8)
ldp x10,x11,JIT_GPR_SAVE_SLOT(10)
ldp x12,x13,JIT_GPR_SAVE_SLOT(12)
ldp x14,x15,JIT_GPR_SAVE_SLOT(14)
ldp x16,x17,JIT_GPR_SAVE_SLOT(16)
ldr x18,JIT_GPR_SAVE_SLOT(18)
ldp x0,x1,JIT_GPR_SAVE_SLOT_X16(0)
ldp x2,x3,JIT_GPR_SAVE_SLOT_X16(2)
ldp x4,x5,JIT_GPR_SAVE_SLOT_X16(4)
ldp x6,x7,JIT_GPR_SAVE_SLOT_X16(6)
ldp x8,x9,JIT_GPR_SAVE_SLOT_X16(8)
ldp x10,x11,JIT_GPR_SAVE_SLOT_X16(10)
ldp x12,x13,JIT_GPR_SAVE_SLOT_X16(12)
ldp x14,x15,JIT_GPR_SAVE_SLOT_X16(14)
ldp x17,x18,JIT_GPR_SAVE_SLOT_X16(17)
})

dnl No need to save/restore d8-15 - the stack walker will never need to read
dnl or modify them (no preserved FPRs in the JIT private linkage).

define({SAVE_C_NONVOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
str x19,JIT_GPR_SAVE_SLOT(19)
stp x20,x21,JIT_GPR_SAVE_SLOT(20)
stp x22,x23,JIT_GPR_SAVE_SLOT(22)
stp x24,x25,JIT_GPR_SAVE_SLOT(24)
stp x26,x27,JIT_GPR_SAVE_SLOT(26)
stp x28,x29,JIT_GPR_SAVE_SLOT(28)
stp x20,x21,JIT_GPR_SAVE_SLOT_X16(20)
stp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
stp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
stp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
stp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({RESTORE_C_NONVOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ldr x19,JIT_GPR_SAVE_SLOT(19)
ldp x20,x21,JIT_GPR_SAVE_SLOT(20)
ldp x22,x23,JIT_GPR_SAVE_SLOT(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT(28)
ldp x20,x21,JIT_GPR_SAVE_SLOT_X16(20)
ldp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({SAVE_ALL_REGS},{
Expand All @@ -185,21 +201,23 @@ define({RESTORE_ALL_REGS},{
})

define({SAVE_PRESERVED_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
str x18,JIT_GPR_SAVE_SLOT(18)
str x21,JIT_GPR_SAVE_SLOT(21)
stp x22,x23,JIT_GPR_SAVE_SLOT(22)
stp x24,x25,JIT_GPR_SAVE_SLOT(24)
stp x26,x27,JIT_GPR_SAVE_SLOT(26)
stp x28,x29,JIT_GPR_SAVE_SLOT(28)
stp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
stp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
stp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
stp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({RESTORE_PRESERVED_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ldr x18,JIT_GPR_SAVE_SLOT(18)
ldr x21,JIT_GPR_SAVE_SLOT(21)
ldp x22,x23,JIT_GPR_SAVE_SLOT(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT(28)
ldp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({BRANCH_VIA_VMTHREAD},{
Expand Down
1 change: 1 addition & 0 deletions runtime/oti/j9nonbuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -6291,6 +6291,7 @@ typedef struct J9CInterpreterStackFrame {
U_8 jitFPRs[16 * 8]; /* fpr0-15 */
#endif /* J9VM_ENV_DATA64 */
#elif defined(J9VM_ARCH_AARCH64) /* J9VM_ARCH_ARM */
UDATA outgoingArguments[J9_INLINE_JNI_MAX_ARG_COUNT];
UDATA preservedGPRs[12]; /* x19-x30 */
U_8 preservedFPRs[8 * 8]; /* v8-15 */
J9JITGPRSpillArea jitGPRs;
Expand Down