-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Arm64EC] Preserve X9 for indirect calls. #167782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Arm64EC indirect calls use a function __os_arm64x_check_icall... this has one obvious return value, x11, which is the function to call. However, it actually returns one other important value: x9, which is the final destination for the emulator after the call. If the call is calling x64 code, x9 is used by the thunk. Previously, we didn't model this, and it mostly worked because the compiler usually doesn't modify x9 in the narrow window between the check, and the call. That said, it can happen in some cases; one reliable way is to do an indirect tail-call with stack protectors enabled. (You can also just get unlucky with register allocation, but it's harder to write a testcase for that.) This patch uses the cfguardtarget bundle to simplify the calling convention handling, for similar reasons that x64 uses it: modifying arbitrary calls is difficult without an explicit marking. Fixes llvm#167430.
|
@llvm/pr-subscribers-backend-aarch64 Author: Eli Friedman (efriedma-quic) ChangesArm64EC indirect calls use a function __os_arm64x_check_icall... this has one obvious return value, x11, which is the function to call. However, it actually returns one other important value: x9, which is the final destination for the emulator after the call. If the call is calling x64 code, x9 is used by the thunk. Previously, we didn't model this, and it mostly worked because the compiler usually doesn't modify x9 in the narrow window between the check, and the call. That said, it can happen in some cases; one reliable way is to do an indirect tail-call with stack protectors enabled. (You can also just get unlucky with register allocation, but it's harder to write a testcase for that.) This patch uses the cfguardtarget bundle to simplify the calling convention handling, for similar reasons that x64 uses it: modifying arbitrary calls is difficult without a separate marking. Fixes #167430. Full diff: https://github.com/llvm/llvm-project/pull/167782.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 97298f9d74171..ebd9ed18bd92f 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -662,12 +662,15 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
Function *Thunk = buildExitThunk(F->getFunctionType(), F->getAttributes());
CallInst *GuardCheck = B.CreateCall(
GuardFnType, GuardCheckLoad, {F, Thunk});
+ Value *GuardCheckDest = B.CreateExtractValue(GuardCheck, 0);
+ Value *GuardFinalDest = B.CreateExtractValue(GuardCheck, 1);
// Ensure that the first argument is passed in the correct register.
GuardCheck->setCallingConv(CallingConv::CFGuard_Check);
SmallVector<Value *> Args(llvm::make_pointer_range(GuestExit->args()));
- CallInst *Call = B.CreateCall(Arm64Ty, GuardCheck, Args);
+ OperandBundleDef OB("cfguardtarget", GuardFinalDest);
+ CallInst *Call = B.CreateCall(Arm64Ty, GuardCheckDest, Args, OB);
Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
if (Call->getType()->isVoidTy())
@@ -767,11 +770,21 @@ void AArch64Arm64ECCallLowering::lowerCall(CallBase *CB) {
CallInst *GuardCheck =
B.CreateCall(GuardFnType, GuardCheckLoad, {CalledOperand, Thunk},
Bundles);
+ Value *GuardCheckDest = B.CreateExtractValue(GuardCheck, 0);
+ Value *GuardFinalDest = B.CreateExtractValue(GuardCheck, 1);
// Ensure that the first argument is passed in the correct register.
GuardCheck->setCallingConv(CallingConv::CFGuard_Check);
- CB->setCalledOperand(GuardCheck);
+ // Update the call: set the callee, and add a bundle with the final
+ // destination,
+ CB->setCalledOperand(GuardCheckDest);
+ OperandBundleDef OB("cfguardtarget", GuardFinalDest);
+ auto *NewCall = CallBase::addOperandBundle(CB, LLVMContext::OB_cfguardtarget,
+ OB, CB->getIterator());
+ NewCall->copyMetadata(*CB);
+ CB->replaceAllUsesWith(NewCall);
+ CB->eraseFromParent();
}
bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
@@ -789,7 +802,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
I64Ty = Type::getInt64Ty(M->getContext());
VoidTy = Type::getVoidTy(M->getContext());
- GuardFnType = FunctionType::get(PtrTy, {PtrTy, PtrTy}, false);
+ GuardFnType = FunctionType::get(StructType::get(PtrTy, PtrTy), {PtrTy, PtrTy}, false);
DispatchFnType = FunctionType::get(PtrTy, {PtrTy, PtrTy, PtrTy}, false);
GuardFnCFGlobal = M->getOrInsertGlobal("__os_arm64x_check_icall_cfg", PtrTy);
GuardFnGlobal = M->getOrInsertGlobal("__os_arm64x_check_icall", PtrTy);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 34c85d588f9c4..e2a79a49a9e92 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -162,7 +162,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[
]>;
let Entry = 1 in
-def CC_AArch64_Win64PCS : CallingConv<AArch64_Common>;
+def CC_AArch64_Win64PCS : CallingConv<!listconcat(
+ [
+ // 'CFGuardTarget' is used for Arm64EC; it passes its parameter in X9.
+ CCIfCFGuardTarget<CCAssignToReg<[X9]>>
+ ],
+ AArch64_Common)
+>;
// Vararg functions on windows pass floats in integer registers
let Entry = 1 in
@@ -177,6 +183,9 @@ def CC_AArch64_Win64_VarArg : CallingConv<[
// a stack layout compatible with the x64 calling convention.
let Entry = 1 in
def CC_AArch64_Arm64EC_VarArg : CallingConv<[
+ // 'CFGuardTarget' is used for Arm64EC; it passes its parameter in X9.
+ CCIfCFGuardTarget<CCAssignToReg<[X9]>>,
+
CCIfNest<CCAssignToReg<[X15]>>,
// Convert small floating-point values to integer.
@@ -345,7 +354,7 @@ def CC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[
let Entry = 1 in
def RetCC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[
- CCIfType<[i64], CCAssignToReg<[X11]>>
+ CCIfType<[i64], CCAssignToReg<[X11, X9]>>
]>;
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-indirect-call.ll b/llvm/test/CodeGen/AArch64/arm64ec-indirect-call.ll
new file mode 100644
index 0000000000000..e6a42c382e4f6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64ec-indirect-call.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple=arm64ec-pc-windows-msvc < %s | FileCheck %s
+
+define void @simple(ptr %g) {
+; CHECK-LABEL: "#simple":
+; CHECK: str x30, [sp, #-16]!
+; CHECK-NEXT: .seh_save_reg_x x30, 16
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: adrp x8, __os_arm64x_check_icall
+; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$v
+; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$v
+; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
+; CHECK-NEXT: mov x11, x0
+; CHECK-NEXT: blr x8
+; CHECK-NEXT: blr x11
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldr x30, [sp], #16
+; CHECK-NEXT: .seh_save_reg_x x30, 16
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: ret
+
+entry:
+ call void %g()
+ ret void
+}
+
+; Make sure the check for the security cookie doesn't use x9.
+define void @stackguard(ptr %g) sspreq {
+; CHECK-LABEL: "#stackguard":
+; CHECK: adrp x8, __os_arm64x_check_icall
+; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
+; CHECK-NEXT: blr x8
+; CHECK-NEXT: adrp x8, __security_cookie
+; CHECK-NEXT: ldr x10, [sp, #8]
+; CHECK-NEXT: ldr x8, [x8, :lo12:__security_cookie]
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: b.ne .LBB1_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: fmov d0, #1.00000000
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldr x30, [sp, #16]
+; CHECK-NEXT: .seh_save_reg x30, 16
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: br x11
+
+entry:
+ %call = tail call double %g(double noundef 1.000000e+00)
+ ret void
+}
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp -- llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index ebd9ed18b..d0c4b1b9f 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -802,7 +802,8 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
I64Ty = Type::getInt64Ty(M->getContext());
VoidTy = Type::getVoidTy(M->getContext());
- GuardFnType = FunctionType::get(StructType::get(PtrTy, PtrTy), {PtrTy, PtrTy}, false);
+ GuardFnType =
+ FunctionType::get(StructType::get(PtrTy, PtrTy), {PtrTy, PtrTy}, false);
DispatchFnType = FunctionType::get(PtrTy, {PtrTy, PtrTy, PtrTy}, false);
GuardFnCFGlobal = M->getOrInsertGlobal("__os_arm64x_check_icall_cfg", PtrTy);
GuardFnGlobal = M->getOrInsertGlobal("__os_arm64x_check_icall", PtrTy);
|
Arm64EC indirect calls use a function __os_arm64x_check_icall... this has one obvious return value, x11, which is the function to call. However, it actually returns one other important value: x9, which is the final destination for the emulator after the call. If the call is calling x64 code, x9 is used by the thunk.
Previously, we didn't model this, and it mostly worked because the compiler usually doesn't modify x9 in the narrow window between the check, and the call. That said, it can happen in some cases; one reliable way is to do an indirect tail-call with stack protectors enabled. (You can also just get unlucky with register allocation, but it's harder to write a testcase for that.)
This patch uses the cfguardtarget bundle to simplify the calling convention handling, for similar reasons that x64 uses it: modifying arbitrary calls is difficult without a separate marking.
Fixes #167430.