Skip to content

Commit 5899bca

Browse files
authored
[AArch64][SME] Resume streaming-mode on entry to exception handlers (#156638)
This patch adds a new `TargetLowering` hook `lowerEHPadEntry()` that is called at the start of lowering EH pads in SelectionDAG. This allows the insertion of target-specific actions on entry to exception handlers. This is used on AArch64 to insert SME streaming-mode switches at landing pads. This is needed as exception handlers are always entered with PSTATE.SM off, and the function needs to resume the streaming mode of the function body.
1 parent 73c5bc5 commit 5899bca

File tree

5 files changed

+250
-1
lines changed

5 files changed

+250
-1
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4686,6 +4686,13 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
46864686
llvm_unreachable("Not Implemented");
46874687
}
46884688

4689+
/// Optional target hook to add target-specific actions when entering EH pad
4690+
/// blocks. The implementation should return the resulting token chain value.
4691+
virtual SDValue lowerEHPadEntry(SDValue Chain, const SDLoc &DL,
4692+
SelectionDAG &DAG) const {
4693+
return SDValue();
4694+
}
4695+
46894696
virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
46904697
ArgListTy &Args) const {}
46914698

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1729,10 +1729,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
17291729
// Setup an EH landing-pad block.
17301730
FuncInfo->ExceptionPointerVirtReg = Register();
17311731
FuncInfo->ExceptionSelectorVirtReg = Register();
1732-
if (LLVMBB->isEHPad())
1732+
if (LLVMBB->isEHPad()) {
17331733
if (!PrepareEHLandingPad())
17341734
continue;
17351735

1736+
if (!FastIS) {
1737+
SDValue NewRoot = TLI->lowerEHPadEntry(CurDAG->getRoot(),
1738+
SDB->getCurSDLoc(), *CurDAG);
1739+
if (NewRoot && NewRoot != CurDAG->getRoot())
1740+
CurDAG->setRoot(NewRoot);
1741+
}
1742+
}
1743+
17361744
// Before doing SelectionDAG ISel, see if FastISel has been requested.
17371745
if (FastIS) {
17381746
if (LLVMBB != &Fn.getEntryBlock())

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8034,6 +8034,39 @@ static bool isPassedInFPR(EVT VT) {
80348034
(VT.isFloatingPoint() && !VT.isScalableVector());
80358035
}
80368036

8037+
SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
8038+
SelectionDAG &DAG) const {
8039+
assert(Chain.getOpcode() == ISD::EntryToken && "Unexpected Chain value");
8040+
SDValue Glue = Chain.getValue(1);
8041+
8042+
MachineFunction &MF = DAG.getMachineFunction();
8043+
SMEAttrs SMEFnAttrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
8044+
8045+
// The following conditions are true on entry to an exception handler:
8046+
// - PSTATE.SM is 0.
8047+
// - PSTATE.ZA is 0.
8048+
// - TPIDR2_EL0 is null.
8049+
// See:
8050+
// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions
8051+
//
8052+
// Therefore, if the function that contains this exception handler is a
8053+
// streaming[-compatible] function, we must re-enable streaming mode.
8054+
//
8055+
// These mode changes are usually optimized away in catch blocks as they
8056+
// occur before the __cxa_begin_catch (which is a non-streaming function),
8057+
// but are necessary in some cases (such as for cleanups).
8058+
8059+
if (SMEFnAttrs.hasStreamingInterfaceOrBody())
8060+
return changeStreamingMode(DAG, DL, /*Enable=*/true, Chain,
8061+
/*Glue*/ Glue, AArch64SME::Always);
8062+
8063+
if (SMEFnAttrs.hasStreamingCompatibleInterface())
8064+
return changeStreamingMode(DAG, DL, /*Enable=*/true, Chain, Glue,
8065+
AArch64SME::IfCallerIsStreaming);
8066+
8067+
return Chain;
8068+
}
8069+
80378070
SDValue AArch64TargetLowering::LowerFormalArguments(
80388071
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
80398072
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,9 @@ class AArch64TargetLowering : public TargetLowering {
575575

576576
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
577577

578+
SDValue lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
579+
SelectionDAG &DAG) const override;
580+
578581
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
579582
bool isVarArg,
580583
const SmallVectorImpl<ISD::InputArg> &Ins,
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sme,+sve -stop-before=finalize-isel -verify-machineinstrs < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
declare void @"StreamingCleanup::~StreamingCleanup"(ptr %this) nounwind "aarch64_pstate_sm_enabled"
7+
declare void @"StreamingCompatCleanup::~StreamingCompatCleanup"(ptr %this) nounwind "aarch64_pstate_sm_compatible"
8+
9+
declare void @may_throw() "aarch64_pstate_sm_compatible"
10+
11+
; This test models the kind of IR clang would emit for the following C++:
12+
;
13+
; struct StreamingCleanup {
14+
; ~StreamingCleanup() __arm_streaming
15+
; };
16+
;
17+
; void may_throw() __arm_streaming_compatible;
18+
;
19+
; void streaming_with_cleanup() __arm_streaming {
20+
; StreamingCleanup cleanup;
21+
; may_throw();
22+
; }
23+
;
24+
; This is a streaming function and all callees of this function are streaming[-compatible]
25+
; functions (including the StreamingCleanup destructor). This means call lowering will not
26+
; insert any streaming mode switches. However, if "may_throw" throws an exception, the
27+
; unwinder can re-enter this function (in %unwind_cleanup) to run the "StreamingCleanup"
28+
; destructor. The unwinder will always re-enter functions with streaming-mode disabled, so
29+
; we must ensure streaming-mode is enabled on entry to exception handlers.
30+
define void @streaming_with_cleanup() "aarch64_pstate_sm_enabled" personality ptr @__gxx_personality_v0 {
31+
; CHECK-LABEL: name: streaming_with_cleanup
32+
; CHECK: bb.0 (%ir-block.0):
33+
; CHECK-NEXT: successors: %bb.1(0x7ffff800), %bb.2(0x00000800)
34+
; CHECK-NEXT: {{ $}}
35+
; CHECK-NEXT: EH_LABEL <mcsymbol >
36+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
37+
; CHECK-NEXT: BL @may_throw, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
38+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
39+
; CHECK-NEXT: EH_LABEL <mcsymbol >
40+
; CHECK-NEXT: B %bb.1
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: bb.1.normal_return:
43+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
44+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0.cleanup, 0, 0
45+
; CHECK-NEXT: $x0 = COPY [[ADDXri]]
46+
; CHECK-NEXT: BL @"StreamingCleanup::~StreamingCleanup", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
47+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
48+
; CHECK-NEXT: RET_ReallyLR
49+
; CHECK-NEXT: {{ $}}
50+
; CHECK-NEXT: bb.2.unwind_cleanup (landing-pad):
51+
; CHECK-NEXT: liveins: $x0, $x1
52+
; CHECK-NEXT: {{ $}}
53+
; CHECK-NEXT: EH_LABEL <mcsymbol >
54+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64all = COPY killed $x1
55+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64all = COPY killed $x0
56+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
57+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
58+
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64sp = ADDXri %stack.0.cleanup, 0, 0
59+
; CHECK-NEXT: $x0 = COPY [[ADDXri1]]
60+
; CHECK-NEXT: BL @"StreamingCleanup::~StreamingCleanup", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
61+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
62+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
63+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit $vg, implicit-def $vg, implicit-def $fpmr
64+
; CHECK-NEXT: $x0 = COPY [[COPY1]]
65+
; CHECK-NEXT: BL @_Unwind_Resume, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
66+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
67+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
68+
%cleanup = alloca i8, align 1
69+
invoke void @may_throw()
70+
to label %normal_return unwind label %unwind_cleanup
71+
72+
normal_return:
73+
call void @"StreamingCleanup::~StreamingCleanup"(ptr %cleanup)
74+
ret void
75+
76+
unwind_cleanup:
77+
%eh_info = landingpad { ptr, i32 }
78+
cleanup
79+
call void @"StreamingCleanup::~StreamingCleanup"(ptr %cleanup)
80+
resume { ptr, i32 } %eh_info
81+
}
82+
83+
; This test is the same as "streaming_with_cleanup", but now the function and destructor
84+
; are streaming-compatible functions. In this case, when we enter the exception handler,
85+
; we must switch to streaming-mode "streaming_compatible_with_cleanup" was entered with
86+
; during normal execution (i.e., EntryPStateSM).
87+
define void @streaming_compatible_with_cleanup() "aarch64_pstate_sm_compatible" personality ptr @__gxx_personality_v0 {
88+
; CHECK-LABEL: name: streaming_compatible_with_cleanup
89+
; CHECK: bb.0 (%ir-block.0):
90+
; CHECK-NEXT: successors: %bb.1(0x7ffff800), %bb.2(0x00000800)
91+
; CHECK-NEXT: {{ $}}
92+
; CHECK-NEXT: [[EntryPStateSM:%[0-9]+]]:gpr64 = EntryPStateSM
93+
; CHECK-NEXT: EH_LABEL <mcsymbol >
94+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
95+
; CHECK-NEXT: BL @may_throw, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
96+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
97+
; CHECK-NEXT: EH_LABEL <mcsymbol >
98+
; CHECK-NEXT: B %bb.1
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: bb.1.normal_return:
101+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
102+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0.cleanup, 0, 0
103+
; CHECK-NEXT: $x0 = COPY [[ADDXri]]
104+
; CHECK-NEXT: BL @"StreamingCompatCleanup::~StreamingCompatCleanup", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
105+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
106+
; CHECK-NEXT: RET_ReallyLR
107+
; CHECK-NEXT: {{ $}}
108+
; CHECK-NEXT: bb.2.unwind_cleanup (landing-pad):
109+
; CHECK-NEXT: liveins: $x0, $x1
110+
; CHECK-NEXT: {{ $}}
111+
; CHECK-NEXT: EH_LABEL <mcsymbol >
112+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64all = COPY killed $x1
113+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64all = COPY killed $x0
114+
; CHECK-NEXT: MSRpstatePseudo 1, 1, 1, [[EntryPStateSM]], csr_aarch64_smstartstop, implicit-def dead $vg, implicit $vg, implicit $vg, implicit-def $vg, implicit-def $fpmr
115+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
116+
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64sp = ADDXri %stack.0.cleanup, 0, 0
117+
; CHECK-NEXT: $x0 = COPY [[ADDXri1]]
118+
; CHECK-NEXT: BL @"StreamingCompatCleanup::~StreamingCompatCleanup", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
119+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
120+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
121+
; CHECK-NEXT: MSRpstatePseudo 1, 0, 1, [[EntryPStateSM]], csr_aarch64_smstartstop, implicit-def $vg, implicit $vg, implicit-def $sp, implicit $vg, implicit-def $vg, implicit-def $fpmr
122+
; CHECK-NEXT: $x0 = COPY [[COPY1]]
123+
; CHECK-NEXT: BL @_Unwind_Resume, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $vg
124+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
125+
; CHECK-NEXT: MSRpstatePseudo 1, 1, 1, [[EntryPStateSM]], csr_aarch64_smstartstop, implicit-def dead $vg, implicit $vg, implicit $vg, implicit-def $vg, implicit-def $fpmr
126+
%cleanup = alloca i8, align 1
127+
invoke void @may_throw()
128+
to label %normal_return unwind label %unwind_cleanup
129+
130+
normal_return:
131+
call void @"StreamingCompatCleanup::~StreamingCompatCleanup"(ptr %cleanup)
132+
ret void
133+
134+
unwind_cleanup:
135+
%eh_info = landingpad { ptr, i32 }
136+
cleanup
137+
call void @"StreamingCompatCleanup::~StreamingCompatCleanup"(ptr %cleanup)
138+
resume { ptr, i32 } %eh_info
139+
}
140+
141+
; This is the same as "streaming_with_cleanup" but for a locally streaming function.
142+
; The lowering of "unwind_cleanup" is expected to match "streaming_with_cleanup".
143+
define void @locally_streaming_with_cleanup() "aarch64_pstate_sm_body" personality ptr @__gxx_personality_v0 {
144+
; CHECK-LABEL: name: locally_streaming_with_cleanup
145+
; CHECK: bb.0 (%ir-block.0):
146+
; CHECK-NEXT: successors: %bb.1(0x7ffff800), %bb.2(0x00000800)
147+
; CHECK-NEXT: {{ $}}
148+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
149+
; CHECK-NEXT: EH_LABEL <mcsymbol >
150+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
151+
; CHECK-NEXT: BL @may_throw, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
152+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
153+
; CHECK-NEXT: EH_LABEL <mcsymbol >
154+
; CHECK-NEXT: B %bb.1
155+
; CHECK-NEXT: {{ $}}
156+
; CHECK-NEXT: bb.1.normal_return:
157+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
158+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0.cleanup, 0, 0
159+
; CHECK-NEXT: $x0 = COPY [[ADDXri]]
160+
; CHECK-NEXT: BL @"StreamingCleanup::~StreamingCleanup", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
161+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
162+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
163+
; CHECK-NEXT: RET_ReallyLR
164+
; CHECK-NEXT: {{ $}}
165+
; CHECK-NEXT: bb.2.unwind_cleanup (landing-pad):
166+
; CHECK-NEXT: liveins: $x0, $x1
167+
; CHECK-NEXT: {{ $}}
168+
; CHECK-NEXT: EH_LABEL <mcsymbol >
169+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64all = COPY killed $x1
170+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64all = COPY killed $x0
171+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
172+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
173+
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64sp = ADDXri %stack.0.cleanup, 0, 0
174+
; CHECK-NEXT: $x0 = COPY [[ADDXri1]]
175+
; CHECK-NEXT: BL @"StreamingCleanup::~StreamingCleanup", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
176+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
177+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
178+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit $vg, implicit-def $vg, implicit-def $fpmr
179+
; CHECK-NEXT: $x0 = COPY [[COPY1]]
180+
; CHECK-NEXT: BL @_Unwind_Resume, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
181+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
182+
; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
183+
%cleanup = alloca i8, align 1
184+
invoke void @may_throw()
185+
to label %normal_return unwind label %unwind_cleanup
186+
187+
normal_return:
188+
call void @"StreamingCleanup::~StreamingCleanup"(ptr %cleanup)
189+
ret void
190+
191+
unwind_cleanup:
192+
%eh_info = landingpad { ptr, i32 }
193+
cleanup
194+
call void @"StreamingCleanup::~StreamingCleanup"(ptr %cleanup)
195+
resume { ptr, i32 } %eh_info
196+
}
197+
198+
declare i32 @__gxx_personality_v0(...)

0 commit comments

Comments
 (0)