10
10
// implementing the lazy ZA state save schemes around calls.
11
11
//
12
12
// ===----------------------------------------------------------------------===//
13
+ //
14
+ // This pass works by collecting instructions that require ZA to be in a
15
+ // specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16
+ // transitions to ensure ZA is in the required state before instructions. State
17
+ // transitions represent actions such as setting up or restoring a lazy save.
18
+ // Certain points within a function may also have predefined states independent
19
+ // of any instructions, for example, a "shared_za" function is always entered
20
+ // and exited in the "ACTIVE" state.
21
+ //
22
+ // To handle ZA state across control flow, we make use of edge bundling. This
23
+ // assigns each block an "incoming" and "outgoing" edge bundle (representing
24
+ // incoming and outgoing edges). Initially, these are unique to each block;
25
+ // then, in the process of forming bundles, the outgoing block of a block is
26
+ // joined with the incoming bundle of all successors. The result is that each
27
+ // bundle can be assigned a single ZA state, which ensures the state required by
28
+ // all a blocks' successors is the same, and that each basic block will always
29
+ // be entered with the same ZA state. This eliminates the need for splitting
30
+ // edges to insert state transitions or "phi" nodes for ZA states.
31
+ //
32
+ // See below for a simple example of edge bundling.
33
+ //
34
+ // The following shows a conditionally executed basic block (BB1):
35
+ //
36
+ // if (cond)
37
+ // BB1
38
+ // BB2
39
+ //
40
+ // Initial Bundles Joined Bundles
41
+ //
42
+ // ┌──0──┐ ┌──0──┐
43
+ // │ BB0 │ │ BB0 │
44
+ // └──1──┘ └──1──┘
45
+ // ├───────┐ ├───────┐
46
+ // ▼ │ ▼ │
47
+ // ┌──2──┐ │ ─────► ┌──1──┐ │
48
+ // │ BB1 │ ▼ │ BB1 │ ▼
49
+ // └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50
+ // └───►4 BB2 │ └───►1 BB2 │
51
+ // └──5──┘ └──2──┘
52
+ //
53
+ // On the left are the initial per-block bundles, and on the right are the
54
+ // joined bundles (which are the result of the EdgeBundles analysis).
13
55
14
56
#include " AArch64InstrInfo.h"
15
57
#include " AArch64MachineFunctionInfo.h"
@@ -210,7 +252,7 @@ struct MachineSMEABI : public MachineFunctionPass {
210
252
} State;
211
253
212
254
MachineFunction *MF = nullptr ;
213
- EdgeBundles *Bundles = nullptr ;
255
+ EdgeBundles *EdgeBundles = nullptr ;
214
256
const AArch64Subtarget *Subtarget = nullptr ;
215
257
const AArch64RegisterInfo *TRI = nullptr ;
216
258
const TargetInstrInfo *TII = nullptr ;
@@ -274,8 +316,8 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
274
316
}
275
317
276
318
void MachineSMEABI::assignBundleZAStates () {
277
- State.BundleStates .resize (Bundles ->getNumBundles ());
278
- for (unsigned I = 0 , E = Bundles ->getNumBundles (); I != E; ++I) {
319
+ State.BundleStates .resize (EdgeBundles ->getNumBundles ());
320
+ for (unsigned I = 0 , E = EdgeBundles ->getNumBundles (); I != E; ++I) {
279
321
LLVM_DEBUG (dbgs () << " Assigning ZA state for edge bundle: " << I << ' \n ' );
280
322
281
323
// Attempt to assign a ZA state for this bundle that minimizes state
@@ -284,16 +326,16 @@ void MachineSMEABI::assignBundleZAStates() {
284
326
// TODO: We should propagate desired incoming/outgoing states through blocks
285
327
// that have the "ANY" state first to make better global decisions.
286
328
int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0 };
287
- for (unsigned BlockID : Bundles ->getBlocks (I)) {
329
+ for (unsigned BlockID : EdgeBundles ->getBlocks (I)) {
288
330
LLVM_DEBUG (dbgs () << " - bb." << BlockID);
289
331
290
332
const BlockInfo &Block = State.Blocks [BlockID];
291
333
if (Block.Insts .empty ()) {
292
334
LLVM_DEBUG (dbgs () << " (no state preference)\n " );
293
335
continue ;
294
336
}
295
- bool InEdge = Bundles ->getBundle (BlockID, /* Out=*/ false ) == I;
296
- bool OutEdge = Bundles ->getBundle (BlockID, /* Out=*/ true ) == I;
337
+ bool InEdge = EdgeBundles ->getBundle (BlockID, /* Out=*/ false ) == I;
338
+ bool OutEdge = EdgeBundles ->getBundle (BlockID, /* Out=*/ true ) == I;
297
339
298
340
ZAState DesiredIncomingState = Block.Insts .front ().NeededState ;
299
341
if (InEdge && isLegalEdgeBundleZAState (DesiredIncomingState)) {
@@ -333,8 +375,8 @@ void MachineSMEABI::assignBundleZAStates() {
333
375
void MachineSMEABI::insertStateChanges () {
334
376
for (MachineBasicBlock &MBB : *MF) {
335
377
const BlockInfo &Block = State.Blocks [MBB.getNumber ()];
336
- ZAState InState =
337
- State. BundleStates [Bundles-> getBundle (MBB. getNumber (), /* Out=*/ false )];
378
+ ZAState InState = State. BundleStates [EdgeBundles-> getBundle (MBB. getNumber (),
379
+ /* Out=*/ false )];
338
380
339
381
ZAState CurrentState = Block.FixedEntryState ;
340
382
if (CurrentState == ZAState::ANY)
@@ -350,8 +392,8 @@ void MachineSMEABI::insertStateChanges() {
350
392
if (MBB.succ_empty ())
351
393
continue ;
352
394
353
- ZAState OutState =
354
- State. BundleStates [Bundles-> getBundle ( MBB.getNumber (), /* Out=*/ true )];
395
+ ZAState OutState = State. BundleStates [EdgeBundles-> getBundle (
396
+ MBB.getNumber (), /* Out=*/ true )];
355
397
if (CurrentState != OutState)
356
398
emitStateChange (MBB, MBB.getFirstTerminator (), CurrentState, OutState,
357
399
Block.PhysLiveRegsAtExit );
@@ -632,7 +674,7 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
632
674
// Reset pass state.
633
675
State = PassState{};
634
676
this ->MF = &MF;
635
- Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles ();
677
+ EdgeBundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles ();
636
678
Subtarget = &MF.getSubtarget <AArch64Subtarget>();
637
679
TII = Subtarget->getInstrInfo ();
638
680
TRI = Subtarget->getRegisterInfo ();
0 commit comments