From ebfae55af454e5b81ca4f532714fd2e589297b8d Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 29 Apr 2025 17:24:51 -0700
Subject: [PATCH 01/83] Bump version to 20.1.5

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 7bb6c66a92e12..c8cc0b8968b05 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 4)
+  set(LLVM_VERSION_PATCH 5)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)

From aecbb2364a7ccee1048447127fdb5865f70b3c6b Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Fri, 18 Apr 2025 16:27:27 +0800
Subject: [PATCH 02/83] [Clang] Fix the trailing comma regression (#136273)

925e195 introduced a regression since which we started to accept invalid
trailing commas in many expression lists where they're not allowed by
the grammar. The issue came from the fact that an additional invalid
state - previously handled by ParseExpressionList - was overlooked in
that patch.

Fixes https://github.com/llvm/llvm-project/issues/136254

No release entry because I want to backport it.

(cherry picked from commit c7daab259c3281cf8f649583993bad2536febc02)
---
 clang/lib/Parse/ParseExpr.cpp  |  3 ---
 clang/test/Parser/recovery.cpp | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 0cadede51a9b3..2fab1dfed4a00 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -2237,8 +2237,6 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
             if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
               RunSignatureHelp();
             LHS = ExprError();
-          } else if (!HasError && HasTrailingComma) {
-            Diag(Tok, diag::err_expected_expression);
           } else if (LHS.isInvalid()) {
             for (auto &E : ArgExprs)
               Actions.CorrectDelayedTyposInExpr(E);
@@ -3738,7 +3736,6 @@ bool Parser::ParseExpressionList(SmallVectorImpl<Expr *> &Exprs,
     if (Tok.is(tok::r_paren)) {
       if (HasTrailingComma)
         *HasTrailingComma = true;
-      break;
     }
   }
   if (SawError) {
diff --git a/clang/test/Parser/recovery.cpp b/clang/test/Parser/recovery.cpp
index 2fce67a52c6b6..261f5dc99bad4 100644
--- a/clang/test/Parser/recovery.cpp
+++ b/clang/test/Parser/recovery.cpp
@@ -222,3 +222,21 @@ void k() {
   func(1, ); // expected-error {{expected expression}}
 }
 }
+
+namespace GH136254 {
+
+void call() {
+  [a(42, )]() {} (); // expected-error {{expected expression}}
+
+  int *b = new int(42, ); // expected-error {{expected expression}}
+
+  struct S {
+    int c;
+
+    S() : c(42, ) {} // expected-error {{expected expression}}
+  };
+
+  int d(42, ); // expected-error {{expected expression}}
+}
+
+}

From be4097b6ee5793c33f6731e9cf908e67d627fded Mon Sep 17 00:00:00 2001
From: 3405691582 <dsk@google.com>
Date: Mon, 31 Mar 2025 12:17:55 -0400
Subject: [PATCH 03/83] Fix crash lowering stack guard on OpenBSD/aarch64.
 (#125416)

TargetLoweringBase::getIRStackGuard refers to a platform-specific guard
variable. Before this change, TargetLoweringBase::getSDagStackGuard only
referred to a different variable.

This means that SelectionDAGBuilder's getLoadStackGuard does not get
memory operands. However, AArch64InstrInfo::expandPostRAPseudo assumes
that the passed MachineInstr has nonzero memoperands, causing a
segfault.

We have two possible options here: either disabling the LOAD_STACK_GUARD
node entirely in AArch64TargetLowering::useLoadStackGuardNode or just
making the platform-specific values match across TargetLoweringBase.
Here, we try the latter.

(cherry picked from commit c180e249d0013474d502cd779ec65b33cf7e9468)
---
 llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9c56912aa6ba0..411f59e714b0e 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1987,6 +1987,9 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
 // Currently only support "standard" __stack_chk_guard.
 // TODO: add LOAD_STACK_GUARD support.
 Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
+  if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
+    return M.getNamedValue("__guard_local");
+  }
   return M.getNamedValue("__stack_chk_guard");
 }
 

From a38e1ae2041db6815c482b5194718409ff2e742c Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Tue, 25 Mar 2025 10:09:25 +0000
Subject: [PATCH 04/83] [AArch64][SME2] Don't preserve ZT0 around SME ABI
 routines (#132722)

This caused ZT0 to be preserved around `__arm_tpidr2_save` in functions
with "aarch64_new_zt0". The block in which `__arm_tpidr2_save` is called
is added by the SMEABIPass and may be reachable in cases where ZA has
not been enabled* (so using `str zt0` is invalid).

* (when za_save_buffer is null and num_za_save_slices is zero)
---
 .../AArch64/Utils/AArch64SMEAttributes.h      |  3 +-
 .../AArch64/sme-disable-gisel-fisel.ll        |  9 +--
 llvm/test/CodeGen/AArch64/sme-zt0-state.ll    | 61 +++++++++++++------
 3 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index fb093da70c46b..a3ebf764a6e0c 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -133,7 +133,8 @@ class SMEAttrs {
   bool hasZT0State() const { return isNewZT0() || sharesZT0(); }
   bool requiresPreservingZT0(const SMEAttrs &Callee) const {
     return hasZT0State() && !Callee.sharesZT0() &&
-           !Callee.hasAgnosticZAInterface();
+           !Callee.hasAgnosticZAInterface() &&
+           !(Callee.Bitmask & SME_ABI_Routine);
   }
   bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
     return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 33d08beae2ca7..4a52bf27a7591 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -475,16 +475,12 @@ declare double @zt0_shared_callee(double) "aarch64_inout_zt0"
 define double  @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" {
 ; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee:
 ; CHECK-COMMON:       // %bb.0: // %prelude
-; CHECK-COMMON-NEXT:    sub sp, sp, #80
-; CHECK-COMMON-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
 ; CHECK-COMMON-NEXT:    cbz x8, .LBB13_2
 ; CHECK-COMMON-NEXT:    b .LBB13_1
 ; CHECK-COMMON-NEXT:  .LBB13_1: // %save.za
-; CHECK-COMMON-NEXT:    mov x8, sp
-; CHECK-COMMON-NEXT:    str zt0, [x8]
 ; CHECK-COMMON-NEXT:    bl __arm_tpidr2_save
-; CHECK-COMMON-NEXT:    ldr zt0, [x8]
 ; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
 ; CHECK-COMMON-NEXT:    b .LBB13_2
 ; CHECK-COMMON-NEXT:  .LBB13_2: // %entry
@@ -495,8 +491,7 @@ define double  @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline
 ; CHECK-COMMON-NEXT:    fmov d1, x8
 ; CHECK-COMMON-NEXT:    fadd d0, d0, d1
 ; CHECK-COMMON-NEXT:    smstop za
-; CHECK-COMMON-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    add sp, sp, #80
+; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-COMMON-NEXT:    ret
 entry:
   %call = call double @zt0_shared_callee(double %x)
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 312537630e77a..500fff4eb20db 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -112,7 +112,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar
   ret void;
 }
 
-; New-ZA Callee
+; New-ZT0 Callee
 
 ; Expect spill & fill of ZT0 around call
 ; Expect smstop/smstart za around call
@@ -134,6 +134,39 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
   ret void;
 }
 
+; New-ZT0 Callee
+
+; Expect commit of lazy-save if ZA is dormant
+; Expect smstart ZA & clear ZT0
+; Expect spill & fill of ZT0 around call
+; Before return, expect smstop ZA
+define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
+; CHECK-LABEL: zt0_new_caller_zt0_new_callee:
+; CHECK:       // %bb.0: // %prelude
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    cbz x8, .LBB6_2
+; CHECK-NEXT:  // %bb.1: // %save.za
+; CHECK-NEXT:    bl __arm_tpidr2_save
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    str zt0, [x19]
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    ldr zt0, [x19]
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+  call void @callee() "aarch64_new_zt0";
+  ret void;
+}
+
 ;
 ; New-ZA Caller
 ;
@@ -144,23 +177,18 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
 define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
 ; CHECK-LABEL: zt0_new_caller:
 ; CHECK:       // %bb.0: // %prelude
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    cbz x8, .LBB6_2
+; CHECK-NEXT:    cbz x8, .LBB7_2
 ; CHECK-NEXT:  // %bb.1: // %save.za
-; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    str zt0, [x8]
 ; CHECK-NEXT:    bl __arm_tpidr2_save
-; CHECK-NEXT:    ldr zt0, [x8]
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:  .LBB7_2:
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    zero { zt0 }
 ; CHECK-NEXT:    bl callee
 ; CHECK-NEXT:    smstop za
-; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   call void @callee() "aarch64_in_zt0";
   ret void;
@@ -172,24 +200,19 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
 define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
 ; CHECK-LABEL: new_za_zt0_caller:
 ; CHECK:       // %bb.0: // %prelude
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    cbz x8, .LBB7_2
+; CHECK-NEXT:    cbz x8, .LBB8_2
 ; CHECK-NEXT:  // %bb.1: // %save.za
-; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    str zt0, [x8]
 ; CHECK-NEXT:    bl __arm_tpidr2_save
-; CHECK-NEXT:    ldr zt0, [x8]
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:  .LBB8_2:
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    zero {za}
 ; CHECK-NEXT:    zero { zt0 }
 ; CHECK-NEXT:    bl callee
 ; CHECK-NEXT:    smstop za
-; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   call void @callee() "aarch64_inout_za" "aarch64_in_zt0";
   ret void;

From 069ef671e0aba3874e690750ad95c59a437b0c34 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Fri, 25 Apr 2025 13:33:09 +0100
Subject: [PATCH 05/83] [AArch64][SME] Allow spills of ZT0 around SME ABI
 routines again (#136726)

In #132722 spills of ZT0 were disabled around all SME ABI routines to
avoid a case where ZT0 is spilled before ZA is enabled (resulting in a
crash).

It turns out that the ABI does not promise that routines will preserve
ZT0 (however in practice they do), so generally disabling ZT0 spills for
ABI routines is not correct.

The case where a crash was possible was "aarch64_new_zt0" functions with
ZA disabled on entry and a ZT0 spill around __arm_tpidr2_save. In this
case, ZT0 will be undefined at the call to __arm_tpidr2_save, so this
patch avoids the ZT0 spill by marking the callsite with
"aarch64_zt0_undef". This attribute only applies to callsites and marks
that at the point the call is made ZT0 is not defined, so does not need
preserving.
---
 llvm/lib/IR/Verifier.cpp                      |  3 ++
 llvm/lib/Target/AArch64/SMEABIPass.cpp        | 16 ++++++--
 .../AArch64/Utils/AArch64SMEAttributes.cpp    |  2 +
 .../AArch64/Utils/AArch64SMEAttributes.h      | 11 ++---
 .../CodeGen/AArch64/sme-new-zt0-function.ll   | 14 +++++++
 llvm/test/CodeGen/AArch64/sme-zt0-state.ll    | 41 +++++++++++++++++--
 llvm/test/Verifier/sme-attributes.ll          |  3 ++
 .../Target/AArch64/SMEAttributesTest.cpp      | 30 ++++++++++++++
 8 files changed, 107 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll

diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8432779c107de..551c00a518b8f 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2818,6 +2818,9 @@ void Verifier::visitFunction(const Function &F) {
   Check(!Attrs.hasAttrSomewhere(Attribute::ElementType),
         "Attribute 'elementtype' can only be applied to a callsite.", &F);
 
+  Check(!Attrs.hasFnAttr("aarch64_zt0_undef"),
+        "Attribute 'aarch64_zt0_undef' can only be applied to a callsite.");
+
   if (Attrs.hasFnAttr(Attribute::Naked))
     for (const Argument &Arg : F.args())
       Check(Arg.use_empty(), "cannot use argument of naked function", &Arg);
diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp
index bb885d86392fe..b6685497e1fd1 100644
--- a/llvm/lib/Target/AArch64/SMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp
@@ -54,14 +54,22 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
 //===----------------------------------------------------------------------===//
 
 // Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
-void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
+void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, bool ZT0IsUndef = false) {
+  auto &Ctx = M->getContext();
   auto *TPIDR2SaveTy =
       FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
-  auto Attrs = AttributeList().addFnAttribute(M->getContext(),
-                                              "aarch64_pstate_sm_compatible");
+  auto Attrs =
+      AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible");
   FunctionCallee Callee =
       M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
   CallInst *Call = Builder.CreateCall(Callee);
+
+  // If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark
+  // that on the __arm_tpidr2_save call. This prevents an unnecessary spill of
+  // ZT0 that can occur before ZA is enabled.
+  if (ZT0IsUndef)
+    Call->addFnAttr(Attribute::get(Ctx, "aarch64_zt0_undef"));
+
   Call->setCallingConv(
       CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0);
 
@@ -119,7 +127,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
 
     // Create a call __arm_tpidr2_save, which commits the lazy save.
     Builder.SetInsertPoint(&SaveBB->back());
-    emitTPIDR2Save(M, Builder);
+    emitTPIDR2Save(M, Builder, /*ZT0IsUndef=*/FnAttrs.isNewZT0());
 
     // Enable pstate.za at the start of the function.
     Builder.SetInsertPoint(&OrigBB->front());
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index bf16acd7f8f7e..76d2ac6a601e5 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -75,6 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
     Bitmask |= SM_Body;
   if (Attrs.hasFnAttr("aarch64_za_state_agnostic"))
     Bitmask |= ZA_State_Agnostic;
+  if (Attrs.hasFnAttr("aarch64_zt0_undef"))
+    Bitmask |= ZT0_Undef;
   if (Attrs.hasFnAttr("aarch64_in_za"))
     Bitmask |= encodeZAState(StateValue::In);
   if (Attrs.hasFnAttr("aarch64_out_za"))
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index a3ebf764a6e0c..1691d4fec8b68 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -43,9 +43,10 @@ class SMEAttrs {
     SM_Body = 1 << 2,         // aarch64_pstate_sm_body
     SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves
     ZA_State_Agnostic = 1 << 4,
-    ZA_Shift = 5,
+    ZT0_Undef = 1 << 5,       // Use to mark ZT0 as undef to avoid spills
+    ZA_Shift = 6,
     ZA_Mask = 0b111 << ZA_Shift,
-    ZT0_Shift = 8,
+    ZT0_Shift = 9,
     ZT0_Mask = 0b111 << ZT0_Shift
   };
 
@@ -125,6 +126,7 @@ class SMEAttrs {
   bool isPreservesZT0() const {
     return decodeZT0State(Bitmask) == StateValue::Preserved;
   }
+  bool isUndefZT0() const { return Bitmask & ZT0_Undef; }
   bool sharesZT0() const {
     StateValue State = decodeZT0State(Bitmask);
     return State == StateValue::In || State == StateValue::Out ||
@@ -132,9 +134,8 @@ class SMEAttrs {
   }
   bool hasZT0State() const { return isNewZT0() || sharesZT0(); }
   bool requiresPreservingZT0(const SMEAttrs &Callee) const {
-    return hasZT0State() && !Callee.sharesZT0() &&
-           !Callee.hasAgnosticZAInterface() &&
-           !(Callee.Bitmask & SME_ABI_Routine);
+    return hasZT0State() && !Callee.isUndefZT0() && !Callee.sharesZT0() &&
+           !Callee.hasAgnosticZAInterface();
   }
   bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
     return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&
diff --git a/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll b/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll
new file mode 100644
index 0000000000000..94968ab4fd9ac
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
+
+declare void @callee();
+
+define void @private_za() "aarch64_new_zt0" {
+  call void @callee()
+  ret void
+}
+
+; CHECK: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() #[[TPIDR2_SAVE_CALL_ATTR:[0-9]+]]
+; CHECK: declare void @__arm_tpidr2_save() #[[TPIDR2_SAVE_DECL_ATTR:[0-9]+]]
+
+; CHECK: attributes #[[TPIDR2_SAVE_DECL_ATTR]] = { "aarch64_pstate_sm_compatible" }
+; CHECK: attributes #[[TPIDR2_SAVE_CALL_ATTR]] = { "aarch64_zt0_undef" }
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 500fff4eb20db..7361e850d713e 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -167,6 +167,39 @@ define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
   ret void;
 }
 
+; Expect commit of lazy-save if ZA is dormant
+; Expect smstart ZA & clear ZT0
+; No spill & fill of ZT0 around __arm_tpidr2_save
+; Expect spill & fill of ZT0 around __arm_sme_state call
+; Before return, expect smstop ZA
+define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind {
+; CHECK-LABEL: zt0_new_caller_abi_routine_callee:
+; CHECK:       // %bb.0: // %prelude
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    cbz x8, .LBB7_2
+; CHECK-NEXT:  // %bb.1: // %save.za
+; CHECK-NEXT:    bl __arm_tpidr2_save
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    str zt0, [x19]
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    ldr zt0, [x19]
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+  %res = call {i64, i64} @__arm_sme_state()
+  %res.0 = extractvalue {i64, i64} %res, 0
+  ret i64 %res.0
+}
+
+declare {i64, i64} @__arm_sme_state()
+
 ;
 ; New-ZA Caller
 ;
@@ -179,11 +212,11 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
 ; CHECK:       // %bb.0: // %prelude
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    cbz x8, .LBB7_2
+; CHECK-NEXT:    cbz x8, .LBB8_2
 ; CHECK-NEXT:  // %bb.1: // %save.za
 ; CHECK-NEXT:    bl __arm_tpidr2_save
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:  .LBB8_2:
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    zero { zt0 }
 ; CHECK-NEXT:    bl callee
@@ -202,11 +235,11 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
 ; CHECK:       // %bb.0: // %prelude
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    cbz x8, .LBB8_2
+; CHECK-NEXT:    cbz x8, .LBB9_2
 ; CHECK-NEXT:  // %bb.1: // %save.za
 ; CHECK-NEXT:    bl __arm_tpidr2_save
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  .LBB8_2:
+; CHECK-NEXT:  .LBB9_2:
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    zero {za}
 ; CHECK-NEXT:    zero { zt0 }
diff --git a/llvm/test/Verifier/sme-attributes.ll b/llvm/test/Verifier/sme-attributes.ll
index 4bf5e813daf2f..0ae2b9fd91f52 100644
--- a/llvm/test/Verifier/sme-attributes.ll
+++ b/llvm/test/Verifier/sme-attributes.ll
@@ -68,3 +68,6 @@ declare void @zt0_inout_out() "aarch64_inout_zt0" "aarch64_out_zt0";
 
 declare void @zt0_inout_agnostic() "aarch64_inout_zt0" "aarch64_za_state_agnostic";
 ; CHECK: Attributes 'aarch64_new_zt0', 'aarch64_in_zt0', 'aarch64_out_zt0', 'aarch64_inout_zt0', 'aarch64_preserves_zt0' and 'aarch64_za_state_agnostic' are mutually exclusive
+
+declare void @zt0_undef_function() "aarch64_zt0_undef";
+; CHECK: Attribute 'aarch64_zt0_undef' can only be applied to a callsite.
diff --git a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
index 3af5e24168c8c..f8c77fcba19cf 100644
--- a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
+++ b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
@@ -1,6 +1,7 @@
 #include "Utils/AArch64SMEAttributes.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
 
@@ -69,6 +70,15 @@ TEST(SMEAttributes, Constructors) {
   ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_new_zt0\"")
                       ->getFunction("foo"))
                   .isNewZT0());
+  ASSERT_TRUE(
+      SA(cast<CallBase>((parseIR("declare void @callee()\n"
+                                 "define void @foo() {"
+                                 "call void @callee() \"aarch64_zt0_undef\"\n"
+                                 "ret void\n}")
+                             ->getFunction("foo")
+                             ->begin()
+                             ->front())))
+          .isUndefZT0());
 
   // Invalid combinations.
   EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible),
@@ -215,6 +225,18 @@ TEST(SMEAttributes, Basics) {
   ASSERT_FALSE(ZT0_New.hasSharedZAInterface());
   ASSERT_TRUE(ZT0_New.hasPrivateZAInterface());
 
+  SA ZT0_Undef = SA(SA::ZT0_Undef | SA::encodeZT0State(SA::StateValue::New));
+  ASSERT_TRUE(ZT0_Undef.isNewZT0());
+  ASSERT_FALSE(ZT0_Undef.isInZT0());
+  ASSERT_FALSE(ZT0_Undef.isOutZT0());
+  ASSERT_FALSE(ZT0_Undef.isInOutZT0());
+  ASSERT_FALSE(ZT0_Undef.isPreservesZT0());
+  ASSERT_FALSE(ZT0_Undef.sharesZT0());
+  ASSERT_TRUE(ZT0_Undef.hasZT0State());
+  ASSERT_FALSE(ZT0_Undef.hasSharedZAInterface());
+  ASSERT_TRUE(ZT0_Undef.hasPrivateZAInterface());
+  ASSERT_TRUE(ZT0_Undef.isUndefZT0());
+
   ASSERT_FALSE(SA(SA::Normal).isInZT0());
   ASSERT_FALSE(SA(SA::Normal).isOutZT0());
   ASSERT_FALSE(SA(SA::Normal).isInOutZT0());
@@ -285,6 +307,7 @@ TEST(SMEAttributes, Transitions) {
   SA ZT0_Shared = SA(SA::encodeZT0State(SA::StateValue::In));
   SA ZA_ZT0_Shared = SA(SA::encodeZAState(SA::StateValue::In) |
                         SA::encodeZT0State(SA::StateValue::In));
+  SA Undef_ZT0 = SA(SA::ZT0_Undef);
 
   // Shared ZA -> Private ZA Interface
   ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(Private_ZA));
@@ -295,6 +318,13 @@ TEST(SMEAttributes, Transitions) {
   ASSERT_TRUE(ZT0_Shared.requiresPreservingZT0(Private_ZA));
   ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA));
 
+  // Shared Undef ZT0 -> Private ZA Interface
+  // Note: "Undef ZT0" is a callsite attribute that means ZT0 is undefined at
+  // point the of the call.
+  ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Undef_ZT0));
+  ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(Undef_ZT0));
+  ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Undef_ZT0));
+
   // Shared ZA & ZT0 -> Private ZA Interface
   ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA));
   ASSERT_TRUE(ZA_ZT0_Shared.requiresPreservingZT0(Private_ZA));

From 8272e451613d8f929e3d9d0d28c3ca1b225b0000 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Wed, 19 Mar 2025 12:01:18 -0700
Subject: [PATCH 06/83] [flang] Exempt construct entities from SAVE check for
 PURE (#131383)

A PURE subprogram can't have a local variable with the SAVE attribute.
An ASSOCIATE or SELECT TYPE construct entity whose selector is a
variable will return true from IsSave(); exclude them from the local
variable check.

Fixes https://github.com/llvm/llvm-project/issues/131356.

(cherry picked from commit b99dab25879449cb89c1ebd7b4088163543918e3)
---
 flang/lib/Semantics/check-declarations.cpp | 5 ++++-
 flang/test/Semantics/call10.f90            | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index 5c26469b9fa24..8da9252133bdc 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -359,7 +359,10 @@ void CheckHelper::Check(const Symbol &symbol) {
       // are not pertinent to the characteristics of the procedure.
       // Restrictions on entities in pure procedure interfaces don't need
       // enforcement.
-    } else if (!FindCommonBlockContaining(symbol) && IsSaved(symbol)) {
+    } else if (symbol.has<AssocEntityDetails>() ||
+        FindCommonBlockContaining(symbol)) {
+      // can look like they have SAVE but are fine in PURE
+    } else if (IsSaved(symbol)) {
       if (IsInitialized(symbol)) {
         messages_.Say(
             "A pure subprogram may not initialize a variable"_err_en_US);
diff --git a/flang/test/Semantics/call10.f90 b/flang/test/Semantics/call10.f90
index 2d2f57934cd8a..1e186f7b4048a 100644
--- a/flang/test/Semantics/call10.f90
+++ b/flang/test/Semantics/call10.f90
@@ -36,6 +36,8 @@ pure subroutine s05a
     end subroutine
   end interface
 
+  real :: moduleVar = 1.
+
  contains
 
   subroutine impure(x)
@@ -117,6 +119,8 @@ pure subroutine s05 ! C1589
     !ERROR: A pure subprogram may not initialize a variable
       real :: v6 = 0.
     end block
+    associate (x => moduleVar) ! ok
+    end associate
   end subroutine
   pure subroutine s06 ! C1589
     !ERROR: A pure subprogram may not have a variable with the VOLATILE attribute

From 6ddf2e5d10f87b9f439316da01906027ddfa5c4a Mon Sep 17 00:00:00 2001
From: Carlos Galvez <carlosgalvezp@gmail.com>
Date: Tue, 29 Apr 2025 11:13:30 +0200
Subject: [PATCH 07/83] =?UTF-8?q?[clang-tidy]=20Do=20not=20pass=20any=20fi?=
 =?UTF-8?q?le=20when=20listing=20checks=20in=20run=5Fclang=5Fti=E2=80=A6?=
 =?UTF-8?q?=20(#137286)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…dy.py

Currently, run_clang_tidy.py does not correctly display the list of
checks picked up from the top-level .clang-tidy file. The reason for
that is that we are passing an empty string as input file.

However, that's not how we are supposed to use clang-tidy to list
checks. Per
https://github.com/llvm/llvm-project/commit/65eccb463df7fe511c813ee6a1794c80d7489ff2,
we simply should not pass any file at all - the internal code of
clang-tidy will pass a "dummy" file if that's the case and get the
.clang-tidy file from the current working directory.

Fixes #136659

Co-authored-by: Carlos Gálvez <carlos.galvez@zenseact.com>
(cherry picked from commit 014ab736dc741f24c007f9861e24b31faba0e1e7)
---
 clang-tools-extra/clang-tidy/tool/run-clang-tidy.py | 7 ++++---
 clang-tools-extra/docs/ReleaseNotes.rst             | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
index f1b934f7139e9..8741147a4f8a3 100755
--- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
+++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py
@@ -87,7 +87,7 @@ def find_compilation_database(path: str) -> str:
 
 
 def get_tidy_invocation(
-    f: str,
+    f: Optional[str],
     clang_tidy_binary: str,
     checks: str,
     tmpdir: Optional[str],
@@ -147,7 +147,8 @@ def get_tidy_invocation(
         start.append(f"--warnings-as-errors={warnings_as_errors}")
     if allow_no_checks:
         start.append("--allow-no-checks")
-    start.append(f)
+    if f:
+        start.append(f)
     return start
 
 
@@ -490,7 +491,7 @@ async def main() -> None:
 
     try:
         invocation = get_tidy_invocation(
-            "",
+            None,
             clang_tidy_binary,
             args.checks,
             None,
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 2ab597eb37048..0b2e9c5fabc36 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -190,6 +190,9 @@ Improvements to clang-tidy
 - Fixed bug in :program:`clang-tidy` by which `HeaderFilterRegex` did not take
   effect when passed via the `.clang-tidy` file.
 
+- Fixed bug in :program:`run_clang_tidy.py` where the program would not
+  correctly display the checks enabled by the top-level `.clang-tidy` file.
+
 New checks
 ^^^^^^^^^^
 

From 70eed33971d9b83fe81d837588dba64a6413b015 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj@nvidia.com>
Date: Wed, 30 Apr 2025 08:22:38 +0100
Subject: [PATCH 08/83] [InstCombine] Do not combine shuffle+bitcast if the
 bitcast is eliminable. (#135769)

If we are attempting to combine shuffle+bitcast but the bitcast is
pairable with a subsequent bitcast, we should not fold the shuffle as
doing so can block further simplifications.

The motivation for this is a long-standing regression affecting SIMDe on
AArch64, introduced indirectly by the AlwaysInliner (1a2e77cf). Some
reproducers:
* https://godbolt.org/z/53qx18s6M
* https://godbolt.org/z/o5e43h5M7

(cherry picked from commit c91c3f930cfc75eb4e8b623ecd59c807863aa6c0)
---
 .../InstCombine/InstCombineVectorOps.cpp      | 16 ++++++---
 .../InstCombine/shufflevec-bitcast.ll         | 35 +++++++++++++++++++
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 6860a7cd07b78..118d2d4be828f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -3029,10 +3029,18 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     SmallVector<BitCastInst *, 8> BCs;
     DenseMap<Type *, Value *> NewBCs;
     for (User *U : SVI.users())
-      if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
-        if (!BC->use_empty())
-          // Only visit bitcasts that weren't previously handled.
-          BCs.push_back(BC);
+      if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
+        // Only visit bitcasts that weren't previously handled.
+        if (BC->use_empty())
+          continue;
+        // Prefer to combine bitcasts of bitcasts before attempting this fold.
+        if (BC->hasOneUse()) {
+          auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
+          if (BC2 && isEliminableCastPair(BC, BC2))
+            continue;
+        }
+        BCs.push_back(BC);
+      }
     for (BitCastInst *BC : BCs) {
       unsigned BegIdx = Mask.front();
       Type *TgtTy = BC->getDestTy();
diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
index f20077243273c..877dd1eefbae4 100644
--- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
@@ -235,3 +235,38 @@ define <3 x i4> @shuf_bitcast_wrong_size(<2 x i8> %v, i8 %x) {
   %r = shufflevector <4 x i4> %b, <4 x i4> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x i4> %r
 }
+
+; Negative test - chain of bitcasts.
+
+define <16 x i8> @shuf_bitcast_chain(<8 x i32> %v) {
+; CHECK-LABEL: @shuf_bitcast_chain(
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[C:%.*]] = bitcast <4 x i32> [[S]] to <16 x i8>
+; CHECK-NEXT:    ret <16 x i8> [[C]]
+;
+  %s = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %a = bitcast <4 x i32> %s to <2 x i64>
+  %b = bitcast <2 x i64> %a to i128
+  %c = bitcast i128 %b to <16 x i8>
+  ret <16 x i8> %c
+}
+
+; Same as above, but showing why it's not feasable to implement the reverse
+; fold in VectorCombine (see #136998).
+
+define <4 x i32> @shuf_bitcast_chain_2(<8 x i32> %v) {
+; CHECK-LABEL: @shuf_bitcast_chain_2(
+; CHECK-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[S0]], [[S1]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %s0 = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %b0 = bitcast <4 x i32> %s0 to i128
+  %b1 = bitcast <4 x i32> %s1 to i128
+  %c0 = bitcast i128 %b0 to <4 x i32>
+  %c1 = bitcast i128 %b1 to <4 x i32>
+  %r = or <4 x i32> %c0, %c1
+  ret <4 x i32> %r
+}

From 009f3c10d1c1be330bf420b97bdd9c5236b93923 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh@yahoo.fr>
Date: Mon, 7 Apr 2025 11:34:24 -0400
Subject: [PATCH 09/83] [LLD][COFF] Don't dllimport from static libraries
 (#134443)

This reverts commit 6a1bdd9 and re-instate behavior that matches what
MSVC link.exe does, that is, error out when trying to dllimport a symbol
from a static library.

A hint is now displayed in stdout, mentioning that we should rather dllimport the symbol
from a import library.

Fixes https://github.com/llvm/llvm-project/issues/131807
---
 lld/COFF/Driver.cpp                           |  6 ++--
 lld/COFF/SymbolTable.cpp                      | 25 ++++++++------
 lld/COFF/SymbolTable.h                        |  5 +--
 .../COFF/imports-static-lib-indirect.test     | 26 +++++++++++++++
 lld/test/COFF/imports-static-lib.test         | 33 +++++++++++++++++++
 lld/test/COFF/undefined_lazy.test             | 26 ---------------
 6 files changed, 77 insertions(+), 44 deletions(-)
 create mode 100644 lld/test/COFF/imports-static-lib-indirect.test
 create mode 100644 lld/test/COFF/imports-static-lib.test
 delete mode 100644 lld/test/COFF/undefined_lazy.test

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index ac3ac57bd17f4..f50ca529df4d7 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2639,10 +2639,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     createECExportThunks();
 
   // Resolve remaining undefined symbols and warn about imported locals.
-  ctx.forEachSymtab([&](SymbolTable &symtab) {
-    while (symtab.resolveRemainingUndefines())
-      run();
-  });
+  ctx.forEachSymtab(
+      [&](SymbolTable &symtab) { symtab.resolveRemainingUndefines(); });
 
   if (errorCount())
     return;
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 307bd4a0c9411..a146e5211736e 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -214,7 +214,8 @@ struct UndefinedDiag {
   std::vector<File> files;
 };
 
-static void reportUndefinedSymbol(COFFLinkerContext &ctx,
+static void reportUndefinedSymbol(SymbolTable *symTab,
+                                  COFFLinkerContext &ctx,
                                   const UndefinedDiag &undefDiag) {
   auto diag = errorOrWarn(ctx);
   diag << "undefined symbol: " << undefDiag.sym;
@@ -232,6 +233,17 @@ static void reportUndefinedSymbol(COFFLinkerContext &ctx,
   }
   if (numDisplayedRefs < numRefs)
     diag << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
+
+  // Hints
+  StringRef name = undefDiag.sym->getName();
+  if (name.consume_front("__imp_")) {
+    Symbol *imp = symTab->find(name);
+    if (imp && imp->isLazy()) {
+      diag << "\nNOTE: a relevant symbol '" << imp->getName()
+           << "' is available in " << toString(imp->getFile())
+           << " but cannot be used because it is not an import library.";
+    }
+  }
 }
 
 void SymbolTable::loadMinGWSymbols() {
@@ -402,7 +414,7 @@ void SymbolTable::reportProblemSymbols(
       processFile(file, file->getSymbols());
 
   for (const UndefinedDiag &undefDiag : undefDiags)
-    reportUndefinedSymbol(ctx, undefDiag);
+    reportUndefinedSymbol(this, ctx, undefDiag);
 }
 
 void SymbolTable::reportUnresolvable() {
@@ -432,11 +444,10 @@ void SymbolTable::reportUnresolvable() {
   reportProblemSymbols(undefs, /*localImports=*/nullptr, true);
 }
 
-bool SymbolTable::resolveRemainingUndefines() {
+void SymbolTable::resolveRemainingUndefines() {
   llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
   SmallPtrSet<Symbol *, 8> undefs;
   DenseMap<Symbol *, Symbol *> localImports;
-  bool foundLazy = false;
 
   for (auto &i : symMap) {
     Symbol *sym = i.second;
@@ -481,11 +492,6 @@ bool SymbolTable::resolveRemainingUndefines() {
             imp = findLocalSym(*mangledName);
         }
       }
-      if (imp && imp->isLazy()) {
-        forceLazy(imp);
-        foundLazy = true;
-        continue;
-      }
       if (imp && isa<Defined>(imp)) {
         auto *d = cast<Defined>(imp);
         replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
@@ -513,7 +519,6 @@ bool SymbolTable::resolveRemainingUndefines() {
   reportProblemSymbols(
       undefs, ctx.config.warnLocallyDefinedImported ? &localImports : nullptr,
       false);
-  return foundLazy;
 }
 
 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index ff6e8487f0734..2916c23d95c87 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -58,10 +58,7 @@ class SymbolTable {
   // Try to resolve any undefined symbols and update the symbol table
   // accordingly, then print an error message for any remaining undefined
   // symbols and warn about imported local symbols.
-  // Returns whether more files might need to be linked in to resolve lazy
-  // symbols, in which case the caller is expected to call the function again
-  // after linking those files.
-  bool resolveRemainingUndefines();
+  void resolveRemainingUndefines();
 
   // Load lazy objects that are needed for MinGW automatic import and for
   // doing stdcall fixups.
diff --git a/lld/test/COFF/imports-static-lib-indirect.test b/lld/test/COFF/imports-static-lib-indirect.test
new file mode 100644
index 0000000000000..beda0d7a31afd
--- /dev/null
+++ b/lld/test/COFF/imports-static-lib-indirect.test
@@ -0,0 +1,26 @@
+# REQUIRES: x86
+
+# Pulling in on both a dllimport symbol and a static symbol should only warn.
+# RUN: split-file %s %t.dir
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/other.s -o %t.other.obj
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/main.s -o %t.main.obj
+# RUN: llvm-lib %t.other.obj -out:%t.other.lib
+# RUN: lld-link %t.other.lib %t.main.obj -out:%t.dll -dll 2>&1 | FileCheck %s
+
+CHECK: warning: {{.*}} locally defined symbol imported: foo {{.*}} [LNK4217]
+
+#--- other.s
+.text
+.globl other
+.globl foo
+other:
+  ret
+foo:
+  ret
+#--- main.s
+.text
+.global _DllMainCRTStartup
+_DllMainCRTStartup:
+  call *other(%rip)
+  call *__imp_foo(%rip)
+  ret
diff --git a/lld/test/COFF/imports-static-lib.test b/lld/test/COFF/imports-static-lib.test
new file mode 100644
index 0000000000000..8e9525dab5284
--- /dev/null
+++ b/lld/test/COFF/imports-static-lib.test
@@ -0,0 +1,33 @@
+# REQUIRES: x86
+
+# Ensure that we don't import dllimport symbols from static (non-import) libraries
+# RUN: split-file %s %t.dir
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo.s -o %t.foo.obj
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/main.s -o %t.main.obj
+# RUN: llvm-lib %t.foo.obj -out:%t.foo.lib
+# RUN: not lld-link %t.foo.lib %t.main.obj -out:%t.dll -dll 2>&1 | FileCheck %s
+
+CHECK: error: undefined symbol: __declspec(dllimport) foo
+CHECK: NOTE: a relevant symbol 'foo' is available in {{.*}}.foo.lib but cannot be used because it is not an import library.
+
+# Now do the same thing, but import the symbol from a import library.
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo_dll_main.s -o %t.foo_dll_main.obj
+# RUN: lld-link /out:%t.dll /dll %t.foo.obj %t.foo_dll_main.obj /export:foo /implib:%t.foo.imp.lib
+# RUN: lld-link %t.main.obj %t.foo.imp.lib -out:%t.exe -dll
+
+#--- foo.s
+.text
+.globl foo
+foo:
+  ret
+#--- foo_dll_main.s
+.text
+.global _DllMainCRTStartup
+_DllMainCRTStartup:
+  ret
+#--- main.s
+.text
+.global _DllMainCRTStartup
+_DllMainCRTStartup:
+  call *__imp_foo(%rip)
+  ret
diff --git a/lld/test/COFF/undefined_lazy.test b/lld/test/COFF/undefined_lazy.test
deleted file mode 100644
index ed5cd358b5cd9..0000000000000
--- a/lld/test/COFF/undefined_lazy.test
+++ /dev/null
@@ -1,26 +0,0 @@
-# REQUIRES: x86
-
-# RUN: split-file %s %t.dir
-# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo.s -o %t.foo.obj
-# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/bar.s -o %t.bar.obj
-# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/qux.s -o %t.qux.obj
-# RUN: llvm-lib %t.foo.obj -out:%t.foo.lib
-# RUN: llvm-lib %t.bar.obj -out:%t.bar.lib
-# RUN: lld-link %t.foo.lib %t.bar.lib %t.qux.obj -out:%t.dll -dll
-#
-#--- foo.s
-.text
-.globl foo
-foo:
-  call bar
-#--- bar.s
-.text
-.globl bar
-bar:
-  ret
-#--- qux.s
-.text
-.global _DllMainCRTStartup
-_DllMainCRTStartup:
-  call *__imp_foo(%rip)
-  ret

From 961ce35e29574eebfa82c447ac7fef8e499f53b9 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Mon, 5 May 2025 16:33:41 -0500
Subject: [PATCH 10/83] [OpenMP] Add pre sm_70 load hack back in (#138589)

Summary:
Different ordering modes aren't supported for an atomic load, so we just
do an add of zero as the same thing. It's less efficient, but it works.

Fixes https://github.com/llvm/llvm-project/issues/138560

(cherry picked from commit dfcb8cb2a92c9f72ddde5ea08dadf2f640197d32)
---
 offload/DeviceRTL/include/Synchronization.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index 5a789441b9d35..c510fbf0774c2 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -61,7 +61,11 @@ V add(Ty *Address, V Val, atomic::OrderingTy Ordering,
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
 V load(Ty *Address, atomic::OrderingTy Ordering,
        MemScopeTy MemScope = MemScopeTy::device) {
+#ifdef __NVPTX__
+  return __scoped_atomic_fetch_add(Address, V(0), Ordering, MemScope);
+#else
   return __scoped_atomic_load_n(Address, Ordering, MemScope);
+#endif
 }
 
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>

From 2386c377db4ff35129d1dc6a618ea13252493ca4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 6 May 2025 14:19:47 +0200
Subject: [PATCH 11/83] [BasicAA] Gracefully handle large LocationSize
 (#138528)

If the LocationSize is larger than the index space of the pointer type,
bail out instead of triggering an APInt assertion.

Fixes the issue reported at
https://github.com/llvm/llvm-project/pull/119365#issuecomment-2849874894.

(cherry picked from commit 027b2038140f309467585298f9cb10d6b37411e7)
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp    |  8 +++++---
 llvm/test/Analysis/BasicAA/size-overflow.ll | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/BasicAA/size-overflow.ll

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index b2a3f3390e000..06e8eb7072917 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1245,8 +1245,11 @@ AliasResult BasicAAResult::aliasGEP(
   if (V1Size.isScalable() || V2Size.isScalable())
     return AliasResult::MayAlias;
 
-  // We need to know both acess sizes for all the following heuristics.
-  if (!V1Size.hasValue() || !V2Size.hasValue())
+  // We need to know both access sizes for all the following heuristics. Don't
+  // try to reason about sizes larger than the index space.
+  unsigned BW = DecompGEP1.Offset.getBitWidth();
+  if (!V1Size.hasValue() || !V2Size.hasValue() ||
+      !isUIntN(BW, V1Size.getValue()) || !isUIntN(BW, V2Size.getValue()))
     return AliasResult::MayAlias;
 
   APInt GCD;
@@ -1301,7 +1304,6 @@ AliasResult BasicAAResult::aliasGEP(
 
   // Compute ranges of potentially accessed bytes for both accesses. If the
   // interseciton is empty, there can be no overlap.
-  unsigned BW = OffsetRange.getBitWidth();
   ConstantRange Range1 = OffsetRange.add(
       ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue())));
   ConstantRange Range2 =
diff --git a/llvm/test/Analysis/BasicAA/size-overflow.ll b/llvm/test/Analysis/BasicAA/size-overflow.ll
new file mode 100644
index 0000000000000..2a390d29e472a
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/size-overflow.ll
@@ -0,0 +1,14 @@
+; RUN: opt -passes=aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+; Make sure that using a LocationSize larget than the index space does not
+; assert.
+
+; CHECK: Just Mod:  Ptr: i32* %gep	<->  call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 4294967296, i1 false)
+define void @test(ptr %p, i32 %idx) {
+  %gep = getelementptr i8, ptr %p, i32 %idx
+  load i32, ptr %gep
+  call void @llvm.memset.i64(ptr %p, i8 0, i64 u0x100000000, i1 false)
+  ret void
+}

From ae97a56d363f95d382bac5eca5f9b5775b1919c2 Mon Sep 17 00:00:00 2001
From: Ikhlas Ajbar <iajbar@quicinc.com>
Date: Tue, 6 May 2025 16:47:25 -0500
Subject: [PATCH 12/83] [Hexagon] Add missing patterns to select PFALSE and
 PTRUE (#138712)

Fixes #134659

(cherry picked from commit 57e88993fee30f4441e87df4df061393600b2ada)
---
 llvm/lib/Target/Hexagon/HexagonPatterns.td    |  5 ++++
 llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll | 29 +++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll

diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 244f204539c89..acf701b0f3e5d 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -109,7 +109,12 @@ def pfalse: PatFrag<(ops), (HexagonPFALSE)>;
 def pnot:   PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>;
 
 def: Pat<(v8i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>;
+def: Pat<(v4i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>;
+def: Pat<(v2i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>;
+
 def: Pat<(v8i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>;
+def: Pat<(v4i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>;
+def: Pat<(v2i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>;
 
 def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
                     (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
diff --git a/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll
new file mode 100644
index 0000000000000..c0904b8b4fdd6
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s
+
+; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0
+; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs
+
+define fastcc i16 @test(ptr %0, { <4 x i32>, <4 x i1> } %1, <4 x i1> %2) {
+Entry:
+  %3 = alloca [16 x i8], i32 0, align 16
+  %4 = alloca [16 x i8], i32 0, align 16
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %4, align 16
+  store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr %3, align 16
+  %5 = load <4 x i32>, ptr %4, align 16
+  %6 = load <4 x i32>, ptr %3, align 16
+  %7 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> %5, <4 x i32> %6)
+  %8 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %2)
+  br i1 %8, label %OverflowFail, label %OverflowOk
+
+OverflowFail:                                     ; preds = %Entry
+  store volatile i32 0, ptr null, align 4
+    unreachable
+
+OverflowOk:                                       ; preds = %Entry
+  %9 = extractvalue { <4 x i32>, <4 x i1> } %7, 0
+    store <4 x i32> %9, ptr %0, align 16
+      ret i16 0
+      }
+
+declare { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32>, <4 x i32>) #0
+declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) #0

From 2b34040173f7d0e9b6da246aa6b923611b56fb87 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Mon, 5 May 2025 13:32:33 +0530
Subject: [PATCH 13/83] [clang-repl] Fix destructor for interpreter for the
 cuda negation case (#138091)

Check this error for more context
(https://github.com/compiler-research/CppInterOp/actions/runs/14749797085/job/41407625681?pr=491#step:10:531)

This fails with
```
* thread #1, name = 'CppInterOpTests', stop reason = signal SIGSEGV: address not mapped to object (fault address: 0x55500356d6d3)
  * frame #0: 0x00007fffee41cfe3 libclangCppInterOp.so.21.0gitclang::PragmaNamespace::~PragmaNamespace() + 99
    frame #1: 0x00007fffee435666 libclangCppInterOp.so.21.0gitclang::Preprocessor::~Preprocessor() + 3830
    frame #2: 0x00007fffee20917a libclangCppInterOp.so.21.0gitstd::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() + 58
    frame #3: 0x00007fffee224796 libclangCppInterOp.so.21.0gitclang::CompilerInstance::~CompilerInstance() + 838
    frame #4: 0x00007fffee22494d libclangCppInterOp.so.21.0gitclang::CompilerInstance::~CompilerInstance() + 13
    frame #5: 0x00007fffed95ec62 libclangCppInterOp.so.21.0gitclang::IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() + 98
    frame #6: 0x00007fffed9551b6 libclangCppInterOp.so.21.0gitclang::Interpreter::~Interpreter() + 102
    frame #7: 0x00007fffed95598d libclangCppInterOp.so.21.0gitclang::Interpreter::~Interpreter() + 13
    frame #8: 0x00007fffed9181e7 libclangCppInterOp.so.21.0gitcompat::createClangInterpreter(std::vector<char const*, std::allocator<char const*>>&) + 2919
```

Problem :

1) The destructor currently handles no clearance for the DeviceParser
and the DeviceAct. We currently only have this

https://github.com/llvm/llvm-project/blob/976493822443c52a71ed3c67aaca9a555b20c55d/clang/lib/Interpreter/Interpreter.cpp#L416-L419

2) The ownership for DeviceCI currently is present in
IncrementalCudaDeviceParser. But this should be similar to how the
combination for hostCI, hostAction and hostParser are managed by the
Interpreter. As on master the DeviceAct and DeviceParser are managed by
the Interpreter but not DeviceCI. This is problematic because :
IncrementalParser holds a Sema& which points into the DeviceCI. On
master, DeviceCI is destroyed before the base class ~IncrementalParser()
runs, causing Parser::reset() to access a dangling Sema (and as Sema
holds a reference to Preprocessor which owns PragmaNamespace) we see
this
```
  * frame #0: 0x00007fffee41cfe3 libclangCppInterOp.so.21.0gitclang::PragmaNamespace::~PragmaNamespace() + 99
    frame #1: 0x00007fffee435666 libclangCppInterOp.so.21.0gitclang::Preprocessor::~Preprocessor() + 3830

```

(cherry picked from commit 529b6fcb00aabbed17365e5fb3abbc2ae127c967)
---
 clang/include/clang/Interpreter/Interpreter.h | 3 +++
 clang/lib/Interpreter/DeviceOffload.cpp       | 8 +++-----
 clang/lib/Interpreter/DeviceOffload.h         | 4 +---
 clang/lib/Interpreter/Interpreter.cpp         | 9 ++++++++-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index 56213f88b9e30..f8663e3193a18 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -116,6 +116,9 @@ class Interpreter {
   /// Compiler instance performing the incremental compilation.
   std::unique_ptr<CompilerInstance> CI;
 
+  /// An optional compiler instance for CUDA offloading
+  std::unique_ptr<CompilerInstance> DeviceCI;
+
 protected:
   // Derived classes can use an extended interface of the Interpreter.
   Interpreter(std::unique_ptr<CompilerInstance> Instance, llvm::Error &Err,
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 7d0125403ea52..05625ddedb72f 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -25,13 +25,12 @@
 namespace clang {
 
 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
-    std::unique_ptr<CompilerInstance> DeviceInstance,
-    CompilerInstance &HostInstance,
+    CompilerInstance &DeviceInstance, CompilerInstance &HostInstance,
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
-    : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
+    : IncrementalParser(DeviceInstance, Err), PTUs(PTUs), VFS(FS),
       CodeGenOpts(HostInstance.getCodeGenOpts()),
-      TargetOpts(DeviceInstance->getTargetOpts()) {
+      TargetOpts(DeviceInstance.getTargetOpts()) {
   if (Err)
     return;
   StringRef Arch = TargetOpts.CPU;
@@ -41,7 +40,6 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
                                                llvm::inconvertibleErrorCode()));
     return;
   }
-  DeviceCI = std::move(DeviceInstance);
 }
 
 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index 43645033c4840..0b903e31c6799 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -28,8 +28,7 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
 
 public:
   IncrementalCUDADeviceParser(
-      std::unique_ptr<CompilerInstance> DeviceInstance,
-      CompilerInstance &HostInstance,
+      CompilerInstance &DeviceInstance, CompilerInstance &HostInstance,
       llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
       llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
 
@@ -42,7 +41,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
   ~IncrementalCUDADeviceParser();
 
 protected:
-  std::unique_ptr<CompilerInstance> DeviceCI;
   int SMVersion;
   llvm::SmallString<1024> PTXCode;
   llvm::SmallVector<char, 1024> FatbinContent;
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index f91563dd0378c..3b81f9d701b42 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -416,6 +416,10 @@ Interpreter::Interpreter(std::unique_ptr<CompilerInstance> Instance,
 Interpreter::~Interpreter() {
   IncrParser.reset();
   Act->FinalizeAction();
+  if (DeviceParser)
+    DeviceParser.reset();
+  if (DeviceAct)
+    DeviceAct->FinalizeAction();
   if (IncrExecutor) {
     if (llvm::Error Err = IncrExecutor->cleanUp())
       llvm::report_fatal_error(
@@ -501,8 +505,11 @@ Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
 
   DCI->ExecuteAction(*Interp->DeviceAct);
 
+  Interp->DeviceCI = std::move(DCI);
+
   auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *Interp->getCompilerInstance(), IMVFS, Err, Interp->PTUs);
+      *Interp->DeviceCI, *Interp->getCompilerInstance(), IMVFS, Err,
+      Interp->PTUs);
 
   if (Err)
     return std::move(Err);

From be087ab35970dffe3e473b5a10266ed356261746 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Mon, 5 May 2025 17:19:13 -0400
Subject: [PATCH 14/83] [libc++] Re-introduce _LIBCPP_DISABLE_AVAILABILITY
 (#134158)

The `_LIBCPP_DISABLE_AVAILABILITY` macro was removed in afae1a5f32bb as an
intended no-op. It turns out that some projects are making use of that
macro to work around a Clang bug with availability annotations that
still exists: https://github.com/llvm/llvm-project/issues/134151.

Since that Clang bug still hasn't been fixed, I feel that we must sill
honor that unfortunate macro until we've figured out how to get rid of
it without breaking code.

(cherry picked from commit 25fc52e655fb4bfd3bb89948d5cbfe011e1b8984)
---
 libcxx/docs/ReleaseNotes/20.rst               |  3 --
 libcxx/include/__configuration/availability.h |  8 ++-
 .../vendor/apple/disable-availability.sh.cpp  | 49 +++++++++++++++++++
 3 files changed, 56 insertions(+), 4 deletions(-)
 create mode 100644 libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 06e6e673b5508..f81a573845e6f 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -153,9 +153,6 @@ Deprecations and Removals
   headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to
   suppress deprecation for these headers.
 
-- The ``_LIBCPP_DISABLE_AVAILABILITY`` macro that was used to force-disable availability markup has now been removed.
-  Whether availability markup is used by the library is now solely controlled at configuration-time.
-
 - The pointer safety functions ``declare_reachable``, ``declare_no_pointers``, ``undeclare_no_pointers`` and
   ``__undeclare_reachable`` have been removed from the library. These functions were never implemented in a non-trivial
   way, making it very unlikely that any binary depends on them.
diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h
index f9e52a690c05c..aa2e75b6f6fe8 100644
--- a/libcxx/include/__configuration/availability.h
+++ b/libcxx/include/__configuration/availability.h
@@ -69,7 +69,13 @@
 
 // Availability markup is disabled when building the library, or when a non-Clang
 // compiler is used because only Clang supports the necessary attributes.
-#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED)
+//
+// We also allow users to force-disable availability markup via the `_LIBCPP_DISABLE_AVAILABILITY`
+// macro because that is the only way to work around a Clang bug related to availability
+// attributes: https://github.com/llvm/llvm-project/issues/134151.
+// Once that bug has been fixed, we should remove the macro.
+#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) ||                                       \
+    !defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_DISABLE_AVAILABILITY)
 #  undef _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS
 #  define _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS 0
 #endif
diff --git a/libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp b/libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp
new file mode 100644
index 0000000000000..474b3f83c6044
--- /dev/null
+++ b/libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: stdlib=apple-libc++
+
+// This test is dependent on the code generated by the compiler, and it doesn't
+// work properly with older AppleClangs.
+// UNSUPPORTED: apple-clang-15
+
+// This test ensures that we retain a way to disable availability markup on Apple platforms
+// in order to work around Clang bug https://github.com/llvm/llvm-project/issues/134151.
+//
+// Once that bug has been fixed or once we've made changes to libc++'s use of availability
+// that render that workaround unnecessary, the macro and this test can be removed.
+//
+// The test works by creating a final linked image that refers to a function marked with
+// both an availability attribute and with _LIBCPP_HIDE_FROM_ABI. We then check that this
+// generates a weak reference to the function -- without the bug, we'd expect a strong
+// reference or no reference at all instead.
+
+// First, test the test. Make sure that we do (incorrectly) produce a weak definition when we
+// don't define _LIBCPP_DISABLE_AVAILABILITY. Otherwise, something may have changed in libc++
+// and this test might not work anymore.
+// RUN: %{cxx} %s %{flags} %{compile_flags} %{link_flags} -fvisibility=hidden -fvisibility-inlines-hidden -shared -o %t.1.dylib
+// RUN: nm -m %t.1.dylib | c++filt | grep value > %t.1.symbols
+// RUN: grep weak %t.1.symbols
+
+// Now, make sure that 'weak' goes away when we define _LIBCPP_DISABLE_AVAILABILITY.
+// In fact, all references to the function might go away, so we just check that we don't emit
+// any weak reference.
+// RUN: %{cxx} %s %{flags} %{compile_flags} %{link_flags} -fvisibility=hidden -fvisibility-inlines-hidden -D_LIBCPP_DISABLE_AVAILABILITY -shared -o %t.2.dylib
+// RUN: nm -m %t.2.dylib | c++filt | grep value > %t.2.symbols
+// RUN: not grep weak %t.2.symbols
+
+#include <version>
+
+template <class T>
+struct optional {
+  T val_;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE T value() const { return val_; }
+};
+
+using PMF = int (optional<int>::*)() const;
+PMF f() { return &optional<int>::value; }

From 5429418cb06455d867f24b2c7cf2477357a4418c Mon Sep 17 00:00:00 2001
From: Raul Tambre <raul@tambre.ee>
Date: Sun, 4 May 2025 20:55:49 +0300
Subject: [PATCH 15/83] [clang] Add support for Debian 14 Forky and Debian 15
 Duke (#138460)

Futureproofs our single Debian-specific special case for roughly the next 6 years.

See: https://lists.debian.org/debian-devel-announce/2025/01/msg00004.html
(cherry picked from commit 58e6883c8b6e571d6bd774645ee2b6348cfed6ba)
---
 clang/include/clang/Driver/Distro.h | 4 +++-
 clang/lib/Driver/Distro.cpp         | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h
index b4d485dac8a26..c544a8c002191 100644
--- a/clang/include/clang/Driver/Distro.h
+++ b/clang/include/clang/Driver/Distro.h
@@ -39,6 +39,8 @@ class Distro {
     DebianBullseye,
     DebianBookworm,
     DebianTrixie,
+    DebianForky,
+    DebianDuke,
     Exherbo,
     RHEL5,
     RHEL6,
@@ -128,7 +130,7 @@ class Distro {
   bool IsOpenSUSE() const { return DistroVal == OpenSUSE; }
 
   bool IsDebian() const {
-    return DistroVal >= DebianLenny && DistroVal <= DebianTrixie;
+    return DistroVal >= DebianLenny && DistroVal <= DebianDuke;
   }
 
   bool IsUbuntu() const {
diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp
index 3cc79535de8da..71ba71fa18379 100644
--- a/clang/lib/Driver/Distro.cpp
+++ b/clang/lib/Driver/Distro.cpp
@@ -160,6 +160,10 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) {
         return Distro::DebianBookworm;
       case 13:
         return Distro::DebianTrixie;
+      case 14:
+        return Distro::DebianForky;
+      case 15:
+        return Distro::DebianDuke;
       default:
         return Distro::UnknownDistro;
       }
@@ -173,6 +177,8 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) {
         .Case("bullseye/sid", Distro::DebianBullseye)
         .Case("bookworm/sid", Distro::DebianBookworm)
         .Case("trixie/sid", Distro::DebianTrixie)
+        .Case("forky/sid", Distro::DebianForky)
+        .Case("duke/sid", Distro::DebianDuke)
         .Default(Distro::UnknownDistro);
   }
 

From a7166c37394612a58bcd237cefbb8fce7424e747 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Fri, 2 May 2025 19:19:39 -0700
Subject: [PATCH 16/83] release/20.x: [clang-format] RemoveParentheses
 shouldn't remove empty parentheses (#138229)

Backport d3506ee573a2aa1403817642ef45f8c0305bb572
---
 clang/lib/Format/UnwrappedLineParser.cpp | 3 ++-
 clang/unittests/Format/FormatTest.cpp    | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 2b348c926294e..c3ffabce15ec8 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2581,7 +2581,8 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
       if (Prev) {
         auto OptionalParens = [&] {
           if (MightBeStmtExpr || MightBeFoldExpr || Line->InMacroBody ||
-              SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave) {
+              SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave ||
+              RParen->getPreviousNonComment() == LParen) {
             return false;
           }
           const bool DoubleParens =
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index bf3eff129efd5..49e1fde1d9ccf 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -27895,6 +27895,8 @@ TEST_F(FormatTest, RemoveParentheses) {
   verifyFormat("return ((... && std::is_convertible_v<TArgsLocal, TArgs>));",
                "return (((... && std::is_convertible_v<TArgsLocal, TArgs>)));",
                Style);
+  verifyFormat("MOCK_METHOD(void, Function, (), override);",
+               "MOCK_METHOD(void, Function, (), (override));", Style);
 
   Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement;
   verifyFormat("#define Return0 return (0);", Style);

From d34d5296095b013bfdec511a06a81777c992e1e3 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Fri, 11 Apr 2025 20:03:23 +0200
Subject: [PATCH 17/83] Support z17 processor name and scheduler description

The recently announced IBM z17 processor implements the architecture
already supported as "arch15" in LLVM. This patch adds support for "z17"
as an alternate architecture name for arch15.

This patch also add the scheduler description for the z17 processor,
provided by Jonas Paulsson.

Manual backport of https://github.com/llvm/llvm-project/pull/135254
---
 clang/lib/Basic/Targets/SystemZ.cpp           |    2 +-
 .../CodeGen/SystemZ/builtins-systemz-bitop.c  |    4 +-
 .../SystemZ/builtins-systemz-vector5-error.c  |    2 +-
 .../SystemZ/builtins-systemz-vector5.c        |    2 +-
 .../SystemZ/builtins-systemz-zvector5-error.c |    2 +-
 .../SystemZ/builtins-systemz-zvector5.c       |    4 +-
 .../test/CodeGen/SystemZ/systemz-abi-vector.c |    2 +
 clang/test/CodeGen/SystemZ/systemz-abi.c      |    2 +
 clang/test/Driver/systemz-march.c             |    2 +
 .../Misc/target-invalid-cpu-note/systemz.c    |    1 +
 .../Preprocessor/predefined-arch-macros.c     |    3 +
 .../Target/SystemZ/SystemZISelLowering.cpp    |    8 +-
 llvm/lib/Target/SystemZ/SystemZInstrVector.td |    4 +-
 llvm/lib/Target/SystemZ/SystemZProcessors.td  |    3 +-
 llvm/lib/Target/SystemZ/SystemZSchedule.td    |    1 +
 llvm/lib/Target/SystemZ/SystemZScheduleZ16.td |   16 +-
 llvm/lib/Target/SystemZ/SystemZScheduleZ17.td | 1754 +++++++++++++++++
 llvm/lib/TargetParser/Host.cpp                |    2 +-
 .../Analysis/CostModel/SystemZ/divrem-reg.ll  |   86 +-
 .../CostModel/SystemZ/i128-cmp-ext-conv.ll    |    8 +-
 .../Analysis/CostModel/SystemZ/int-arith.ll   |   10 +-
 llvm/test/CodeGen/SystemZ/args-12.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/args-13.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll |    2 +-
 llvm/test/CodeGen/SystemZ/int-abs-03.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-add-19.ll       |    2 +-
 llvm/test/CodeGen/SystemZ/int-cmp-64.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-conv-15.ll      |    4 +-
 llvm/test/CodeGen/SystemZ/int-div-08.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-max-02.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-min-02.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-mul-14.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-mul-15.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-mul-16.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-neg-04.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/int-sub-12.ll       |    2 +-
 llvm/test/CodeGen/SystemZ/llxa-01.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/llxa-02.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/llxa-03.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/llxa-04.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/llxa-05.ll          |    2 +-
 llvm/test/CodeGen/SystemZ/lxa-01.ll           |    2 +-
 llvm/test/CodeGen/SystemZ/lxa-02.ll           |    2 +-
 llvm/test/CodeGen/SystemZ/lxa-03.ll           |    2 +-
 llvm/test/CodeGen/SystemZ/lxa-04.ll           |    2 +-
 llvm/test/CodeGen/SystemZ/lxa-05.ll           |    2 +-
 llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll   |    2 +-
 llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll   |    2 +-
 llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll   |    2 +-
 llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll   |    4 +-
 llvm/test/CodeGen/SystemZ/vec-cmp-09.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/vec-div-03.ll       |    4 +-
 llvm/test/CodeGen/SystemZ/vec-eval.ll         |   58 +-
 .../test/CodeGen/SystemZ/vec-intrinsics-05.ll |    4 +-
 llvm/test/CodeGen/SystemZ/vec-mul-06.ll       |    4 +-
 .../{insns-arch15.txt => insns-z17.txt}       |    4 +-
 .../{insn-bad-arch15.s => insn-bad-z17.s}     |    4 +-
 .../{insn-good-arch15.s => insn-good-z17.s}   |    4 +-
 llvm/unittests/TargetParser/Host.cpp          |    2 +-
 59 files changed, 1926 insertions(+), 160 deletions(-)
 create mode 100644 llvm/lib/Target/SystemZ/SystemZScheduleZ17.td
 rename llvm/test/MC/Disassembler/SystemZ/{insns-arch15.txt => insns-z17.txt} (99%)
 rename llvm/test/MC/SystemZ/{insn-bad-arch15.s => insn-bad-z17.s} (98%)
 rename llvm/test/MC/SystemZ/{insn-good-arch15.s => insn-good-z17.s} (99%)

diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index c836d110d26d5..6326188b3bd18 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -105,7 +105,7 @@ static constexpr ISANameRevision ISARevisions[] = {
   {{"arch12"}, 12}, {{"z14"}, 12},
   {{"arch13"}, 13}, {{"z15"}, 13},
   {{"arch14"}, 14}, {{"z16"}, 14},
-  {{"arch15"}, 15},
+  {{"arch15"}, 15}, {{"z17"}, 15},
 };
 
 int SystemZTargetInfo::getISARevision(StringRef Name) const {
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c
index 5b4051c8d6f17..717a7d7ab49e2 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c
@@ -1,6 +1,6 @@
 // REQUIRES: systemz-registered-target
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s
 
 unsigned long test_bdepg(unsigned long a, unsigned long b) {
 // CHECK-LABEL: test_bdepg
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c
index 3943a15af9d2f..8275b9ddb88a8 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c
@@ -1,5 +1,5 @@
 // REQUIRES: systemz-registered-target
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-unknown-unknown \
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-unknown-unknown \
 // RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s
 
 typedef __attribute__((vector_size(16))) signed char vec_schar;
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c
index c3621819e71f9..b765fa64b33d4 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c
@@ -1,5 +1,5 @@
 // REQUIRES: systemz-registered-target
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -flax-vector-conversions=none \
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -flax-vector-conversions=none \
 // RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
 
 typedef __attribute__((vector_size(16))) signed char vec_schar;
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c
index 9f4844efd6312..79041b923068e 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c
@@ -1,5 +1,5 @@
 // REQUIRES: systemz-registered-target
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \
 // RUN: -fzvector -flax-vector-conversions=none \
 // RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s
 
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
index 7a29dbf552e0b..6ee9e1ee3a117 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
@@ -1,8 +1,8 @@
 // REQUIRES: systemz-registered-target
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \
 // RUN: -O2 -fzvector -flax-vector-conversions=none \
 // RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \
+// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \
 // RUN: -O2 -fzvector -flax-vector-conversions=none \
 // RUN: -Wall -Wno-unused -Werror -S %s -o - | FileCheck %s --check-prefix=CHECK-ASM
 
diff --git a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
index 1e1926678ec33..e5704709a3a33 100644
--- a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
+++ b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
@@ -18,6 +18,8 @@
 // RUN:   -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
 // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \
 // RUN:   -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
+// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu z17 \
+// RUN:   -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
 // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \
 // RUN:   -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
 
diff --git a/clang/test/CodeGen/SystemZ/systemz-abi.c b/clang/test/CodeGen/SystemZ/systemz-abi.c
index 58081bdc6cc2a..7de425950e9fd 100644
--- a/clang/test/CodeGen/SystemZ/systemz-abi.c
+++ b/clang/test/CodeGen/SystemZ/systemz-abi.c
@@ -24,6 +24,8 @@
 // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \
 // RUN:   -emit-llvm -o - %s -mfloat-abi soft | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,SOFT-FLOAT
+// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu z17 \
+// RUN:   -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT
 // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \
 // RUN:   -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT
 // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \
diff --git a/clang/test/Driver/systemz-march.c b/clang/test/Driver/systemz-march.c
index 93a11c6c9c013..8922db9f2d5d6 100644
--- a/clang/test/Driver/systemz-march.c
+++ b/clang/test/Driver/systemz-march.c
@@ -15,6 +15,7 @@
 // RUN: %clang -target s390x -### -S -emit-llvm -march=arch13 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH13 %s
 // RUN: %clang -target s390x -### -S -emit-llvm -march=z16 %s 2>&1 | FileCheck --check-prefix=CHECK-Z16 %s
 // RUN: %clang -target s390x -### -S -emit-llvm -march=arch14 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH14 %s
+// RUN: %clang -target s390x -### -S -emit-llvm -march=z17 %s 2>&1 | FileCheck --check-prefix=CHECK-Z17 %s
 // RUN: %clang -target s390x -### -S -emit-llvm -march=arch15 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH15 %s
 
 // CHECK-Z9: error: unknown target CPU 'z9'
@@ -32,6 +33,7 @@
 // CHECK-ARCH13: "-target-cpu" "arch13"
 // CHECK-Z16: "-target-cpu" "z16"
 // CHECK-ARCH14: "-target-cpu" "arch14"
+// CHECK-Z17: "-target-cpu" "z17"
 // CHECK-ARCH15: "-target-cpu" "arch15"
 
 int x;
diff --git a/clang/test/Misc/target-invalid-cpu-note/systemz.c b/clang/test/Misc/target-invalid-cpu-note/systemz.c
index b70173f5feec2..021c280d53190 100644
--- a/clang/test/Misc/target-invalid-cpu-note/systemz.c
+++ b/clang/test/Misc/target-invalid-cpu-note/systemz.c
@@ -20,4 +20,5 @@
 // CHECK-SAME: {{^}}, arch14
 // CHECK-SAME: {{^}}, z16
 // CHECK-SAME: {{^}}, arch15
+// CHECK-SAME: {{^}}, z17
 // CHECK-SAME: {{$}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index f267f1759cdb5..2d17891071aae 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -4394,6 +4394,9 @@
 // RUN: %clang -march=arch15 -E -dM %s -o - 2>&1 \
 // RUN:     -target s390x-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15
+// RUN: %clang -march=z17 -E -dM %s -o - 2>&1 \
+// RUN:     -target s390x-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15
 // CHECK_SYSTEMZ_ARCH15: #define __ARCH__ 15
 // CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
 // CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2b8269e440e90..049865c81667a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -254,7 +254,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ROTR,      MVT::i128, Expand);
     setOperationAction(ISD::ROTL,      MVT::i128, Expand);
 
-    // No special instructions for these before arch15.
+    // No special instructions for these before z17.
     if (!Subtarget.hasVectorEnhancements3()) {
       setOperationAction(ISD::MUL,   MVT::i128, Expand);
       setOperationAction(ISD::MULHS, MVT::i128, Expand);
@@ -281,7 +281,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     // Use VPOPCT and add up partial results.
     setOperationAction(ISD::CTPOP, MVT::i128, Custom);
 
-    // Additional instructions available with arch15.
+    // Additional instructions available with z17.
     if (Subtarget.hasVectorEnhancements3()) {
       setOperationAction(ISD::ABS, MVT::i128, Legal);
     }
@@ -353,7 +353,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
 
-  // On arch15 we have native support for a 64-bit CTTZ.
+  // On z17 we have native support for a 64-bit CTTZ.
   if (Subtarget.hasMiscellaneousExtensions4()) {
     setOperationAction(ISD::CTTZ, MVT::i32, Promote);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Promote);
@@ -4466,7 +4466,7 @@ SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
   SDLoc DL(Op);
   SDValue Even, Odd;
 
-  // This custom expander is only used on arch15 and later for 64-bit types.
+  // This custom expander is only used on z17 and later for 64-bit types.
   assert(!is32Bit(VT));
   assert(Subtarget.hasMiscellaneousExtensions2());
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index edd20a5de8c63..a4ece32c79d88 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1973,7 +1973,7 @@ let Predicates = [FeatureVector] in {
             (VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
 }
 
-// In-register i128 sign-extensions on arch15.
+// In-register i128 sign-extensions on z17.
 let Predicates = [FeatureVectorEnhancements3] in {
   def : Pat<(i128 (sext_inreg VR128:$x, i8)), (VUPLG (VSEGB VR128:$x))>;
   def : Pat<(i128 (sext_inreg VR128:$x, i16)), (VUPLG (VSEGH VR128:$x))>;
@@ -1993,7 +1993,7 @@ let Predicates = [FeatureVector] in {
             (VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>;
 }
 
-// Sign-extensions from GPR to i128 on arch15.
+// Sign-extensions from GPR to i128 on z17.
 let Predicates = [FeatureVectorEnhancements3] in {
   def : Pat<(i128 (sext_inreg (anyext GR32:$x), i8)),
             (VUPLG (VLVGP (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$x, subreg_l32)),
diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td
index 75b6671dc7723..0827701a48b5a 100644
--- a/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -41,4 +41,5 @@ def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>;
 def : ProcessorModel<"arch14", Z16Model, Arch14SupportedFeatures.List>;
 def : ProcessorModel<"z16", Z16Model, Arch14SupportedFeatures.List>;
 
-def : ProcessorModel<"arch15", Z16Model, Arch15SupportedFeatures.List>;
+def : ProcessorModel<"arch15", Z17Model, Arch15SupportedFeatures.List>;
+def : ProcessorModel<"z17", Z17Model, Arch15SupportedFeatures.List>;
diff --git a/llvm/lib/Target/SystemZ/SystemZSchedule.td b/llvm/lib/Target/SystemZ/SystemZSchedule.td
index d683cc042e5c9..cc03a71d8a649 100644
--- a/llvm/lib/Target/SystemZ/SystemZSchedule.td
+++ b/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -60,6 +60,7 @@ def VBU : SchedWrite; // Virtual branching unit
 
 def MCD : SchedWrite; // Millicode
 
+include "SystemZScheduleZ17.td"
 include "SystemZScheduleZ16.td"
 include "SystemZScheduleZ15.td"
 include "SystemZScheduleZ14.td"
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
index 2c01691707cc3..a9354ea76c72c 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
@@ -1555,12 +1555,12 @@ def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
 
 def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCH(S|D|X)?P$")>;
 def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCSHP$")>;
-def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH")>;
-def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP")>;
-def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR")>;
-def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR")>;
-def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH")>;
-def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL")>;
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH$")>;
+def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP$")>;
+def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR$")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR$")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH$")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL$")>;
 
 // -------------------------------- System ---------------------------------- //
 
@@ -1597,8 +1597,8 @@ def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>;
 // System: Breaking-Event-Address-Register Instructions
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR")>;
-def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR")>;
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR$")>;
+def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR$")>;
 
 //===----------------------------------------------------------------------===//
 // System: Storage-Key and Real Memory Instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td
new file mode 100644
index 0000000000000..bd52627f636a7
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td
@@ -0,0 +1,1754 @@
+//--- SystemZScheduleZ17.td - SystemZ Scheduling Definitions ---*- tblgen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z17 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+// Pseudos expanded right after isel do not need to be modelled here.
+//
+//===----------------------------------------------------------------------===//
+
+def Z17Model : SchedMachineModel {
+
+    let UnsupportedFeatures = Arch15UnsupportedFeatures.List;
+
+    let IssueWidth = 6;             // Number of instructions decoded per cycle.
+    let MicroOpBufferSize = 60;     // Issue queues
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty = 20;
+}
+
+let SchedModel = Z17Model in  {
+// These definitions need the SchedModel value. They could be put in a
+// subtarget common include file, but it seems the include system in Tablegen
+// currently (2016) rejects multiple includes of same file.
+
+// Decoder grouping rules
+let NumMicroOps = 1 in {
+  def : WriteRes<NormalGr, []>;
+  def : WriteRes<BeginGroup, []> { let BeginGroup  = 1; }
+  def : WriteRes<EndGroup, []>   { let EndGroup    = 1; }
+}
+def : WriteRes<Cracked, []> {
+  let NumMicroOps = 2;
+  let BeginGroup  = 1;
+}
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 3;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone2, []> {
+  let NumMicroOps = 6;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+  let NumMicroOps = 9;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+
+// Incoming latency removed from the register operand which is used together
+// with a memory operand by the instruction.
+def : ReadAdvance<RegReadAdv, 4>;
+
+// LoadLatency (above) is not used for instructions in this file. This is
+// instead the role of LSULatency, which is the latency value added to the
+// result of loads and instructions with folded memory operands.
+def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; }
+
+let NumMicroOps = 0 in {
+  foreach L = 1-30 in
+    def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; }
+}
+
+// Execution units.
+def Z17_FXaUnit     : ProcResource<2>;
+def Z17_FXbUnit     : ProcResource<2>;
+def Z17_LSUnit      : ProcResource<2>;
+def Z17_VecUnit     : ProcResource<2>;
+def Z17_VecFPdUnit  : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Z17_VBUnit      : ProcResource<2>;
+def Z17_MCD         : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+let NumMicroOps = 0 in {
+  def : WriteRes<FXa, [Z17_FXaUnit]>;
+  def : WriteRes<FXb, [Z17_FXbUnit]>;
+  def : WriteRes<LSU, [Z17_LSUnit]>;
+  def : WriteRes<VecBF,  [Z17_VecUnit]>;
+  def : WriteRes<VecDF,  [Z17_VecUnit]>;
+  def : WriteRes<VecDFX, [Z17_VecUnit]>;
+  def : WriteRes<VecMul,  [Z17_VecUnit]>;
+  def : WriteRes<VecStr,  [Z17_VecUnit]>;
+  def : WriteRes<VecXsPm, [Z17_VecUnit]>;
+  foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in {
+    def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z17_FXaUnit]>;
+    def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z17_FXbUnit]>;
+    def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z17_LSUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z17_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z17_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z17_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z17_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z17_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z17_VecUnit]>;
+  }}
+
+  def : WriteRes<VecFPd,   [Z17_VecFPdUnit]> { let ReleaseAtCycles = [30]; }
+  def : WriteRes<VecFPd20, [Z17_VecFPdUnit]> { let ReleaseAtCycles = [20]; }
+
+  def : WriteRes<VBU,     [Z17_VBUnit]>; // Virtual Branching Unit
+}
+
+def : WriteRes<MCD, [Z17_MCD]> { let NumMicroOps = 3;
+                                 let BeginGroup  = 1;
+                                 let EndGroup    = 1; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+// Pseudo -> LA / LAY
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>;
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb2, GroupAlone],
+             (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>;
+
+// Pseudo -> reg move
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>;
+
+// Loads
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr],
+             (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>;
+
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>;
+
+// Store multiple
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>;
+
+// Load (logical) indexed address.
+def : InstRW<[WLat2, FXa2, NormalGr], (instregex "(L)?LXA(B|H|F|G|Q)$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>;
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "IC32(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr],
+             (instregex "ICM(H|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "A(Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AL(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "ALG(F)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+             (instregex "ALC(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "S(G|Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "SH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+             (instregex "SLB(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "SG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "N(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "O(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "X(G|Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NOT(G)?R$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "MS(GF|Y)?$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MS(R|FI)$")>;
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MSGR$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MSGF(I|R)$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[WLat5, FXa2, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>;
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>;
+def : InstRW<[WLat5, FXa2, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXa2, LSU, GroupAlone],
+             (instregex "M(FY|L)?$")>;
+def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>;
+def : InstRW<[WLat9, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>;
+def : InstRW<[WLat5, FXa2, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "MSC$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "MSGC$")>;
+def : InstRW<[WLat4, WLat4, FXa, NormalGr], (instregex "MSRKC$")>;
+def : InstRW<[WLat4, WLat4, FXa, NormalGr], (instregex "MSGRKC$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
+def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
+             (instregex "DSG(F)?$")>;
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+             (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
+             (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)(Opt)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)(Opt)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?(Z)?(Opt)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+             (instregex "C(G|Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+             (instregex "CL(Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>;
+def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr],
+             (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
+             (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
+             (instregex "CDS(Y)?$")>;
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
+              GroupAlone3], (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[WLat30, MCD], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
+
+// Compare and load
+def : InstRW<[WLat30, MCD], (instregex "CAL(G|GF)?$")>;
+
+// Perform functions with concurrent results
+def : InstRW<[WLat30, MCD], (instregex "PFCR$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
+             (instregex "TRT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+             (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD],
+             (instregex "KM(C|F|O|CTR|A)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+             (instregex "(KIMD|KLMD|KMAC|KDSA)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+             (instregex "(PCC|PPNO|PRNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>;
+def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
+             (instregex "CVBG$")>;
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
+             (instregex "CVB(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
+
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
+             (instregex "(A|S|ZA)P$")>;
+def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
+def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
+def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
+def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
+
+// Transaction end
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[WLat30, MCD], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Count leading/trailing zeros.
+def : InstRW<[WLat3, FXa, NormalGr], (instregex "C(L|T)ZG$")>;
+
+// Find leftmost one
+def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>;
+
+// Bit deposit and bit extract.
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "(BDEPG|BEXTG)$")>;
+
+// String instructions
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD],
+             (instregex "UPT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "NNPA$")>;
+
+// Execute
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>;
+def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
+
+// Copy sign
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked],
+             (instregex "C(F|G)(E|D)BR(A)?$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked],
+             (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQEBR$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDBR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "A(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "S(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+             (instregex "MXDB$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "M(A|S)EB$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DEB$")>;
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DDB$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DEBR$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDBR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+             (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[WLat30, MCD], (instregex "SFASR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LFAS$")>;
+def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
+
+// Convert from fixed
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQER$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "A(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "S(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+             (instregex "MXD$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "MY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "M(A|S)(E|D)$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone],
+             (instregex "MAY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "MAY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+
+// Division
+def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DE$")>;
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DD$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DER$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "C(E|D)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>;
+def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>;
+def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>;
+def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>;
+def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDLGTR$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked],
+             (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked],
+             (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>;
+def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[WLat20, VecDF, NormalGr], (instregex "MDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
+def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+             (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
+             (instregex "VGE(F|G)$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+             (instregex "VLM(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
+def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+             (instregex "VLEBR(H|F|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBLEND(B|F|G|H|Q)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGEM(B|H|F|G|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H|G)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F|G)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|G|W)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VD(L)?(F|G|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VEVAL$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F|G|Q)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|G|Q|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H|G|Q)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H|G|Q)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F|G|Q)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|G|Q|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H|G)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H|G)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>;
+
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VR(L)?(F|G|Q)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H|Q)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H|Q)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H|Q)S$")>;
+def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// Conversion and rounding
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "WFLRX$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "WFIXB$")>;
+
+// Sign operations
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>;
+
+// Minimum / maximum
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>;
+
+// Test data class
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>;
+
+// Add / subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "WF(A|S)XB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>;
+def : InstRW<[WLat20, VecDF, NormalGr], (instregex "WFMXB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat20, VecDF, NormalGr], (instregex "WF(N)?M(A|S)XB$")>;
+
+// Divide / square root
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+             (instregex "WF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+             (instregex "VF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>;
+def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+             (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+             (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// NNP assist instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCFN$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLFN(L|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VC(R)?NF$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
+def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>;
+def : InstRW<[WLat20, WLat20, VecDF, FXb, GroupAlone],
+             (instregex "VCVB(G|Q)?(Opt)?$")>;
+def : InstRW<[WLat15, WLat15, VecDF, FXb, GroupAlone],
+             (instregex "VCVD(G|Q)?$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF, GroupAlone], (instregex "VM(S)?P$")>;
+def : InstRW<[WLat30, WLat30, VecDF, GroupAlone], (instregex "V(D|R)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF, GroupAlone], (instregex "VSDP$")>;
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "VSRP(R)?$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)(P|Z)(Opt)?$")>;
+
+def : InstRW<[WLat20, VecDF, NormalGr], (instregex "VSCH(S|D|X)?P$")>;
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "VSCSHP$")>;
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "VCSPH$")>;
+def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP$")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR$")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH$")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL$")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?(Y)?$")>;
+def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
+def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Breaking-Event-Address-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR$")>;
+def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "ISKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "IVSK$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>;
+def : InstRW<[WLat30, MCD], (instregex "IRBM$")>;
+def : InstRW<[WLat30, MCD], (instregex "PFMF$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGIN$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RDP(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTLB$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STRAG$")>;
+def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "LASP$")>;
+def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PC$")>;
+def : InstRW<[WLat30, MCD], (instregex "PR$")>;
+def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RP$")>;
+def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "BAKR$")>;
+def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
+def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "STAP$")>;
+def : InstRW<[WLat30, MCD], (instregex "STIDP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "ECAG$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTF$")>;
+def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "QPACI$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "SVC$")>;
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>;
+def : InstRW<[WLat30, MCD], (instregex "DIAG$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRACE$")>;
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TRACG$")>;
+def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "RCHP$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCHM$")>;
+def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPE?I$")>;
+def : InstRW<[WLat30, MCD], (instregex "SAL$")>;
+
+//===----------------------------------------------------------------------===//
+// NOPs
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?(Opt)?$")>;
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>;
+}
+
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index fa57ae183bb84..4c60698a63eff 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -428,7 +428,7 @@ StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
     case 9175:
     case 9176:
     default:
-      return HaveVectorSupport? "arch15" : "zEC12";
+      return HaveVectorSupport? "z17" : "zEC12";
   }
 }
 } // end anonymous namespace
diff --git a/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll b/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
index 2f13d7e3ef9b1..68ffe5759e135 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s --check-prefixes=CHECK,ARC15
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z17 | FileCheck %s --check-prefixes=CHECK,Z17
 
 ; Check costs of divisions by register
 ;
@@ -52,9 +52,9 @@ define <2 x i64> @fun4(<2 x i64> %a, <2 x i64> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %r = sdiv <2 x i64> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
-; ARC15-LABEL: 'fun4'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i64> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+; Z17-LABEL: 'fun4'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i64> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
   %r = sdiv <2 x i64> %a, %b
   ret <2 x i64> %r
@@ -65,9 +65,9 @@ define <4 x i32> @fun5(<4 x i32> %a, <4 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %r = sdiv <4 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
-; ARC15-LABEL: 'fun5'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <4 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+; Z17-LABEL: 'fun5'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <4 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
   %r = sdiv <4 x i32> %a, %b
   ret <4 x i32> %r
@@ -78,9 +78,9 @@ define <2 x i32> @fun6(<2 x i32> %a, <2 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %r = sdiv <2 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
-; ARC15-LABEL: 'fun6'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+; Z17-LABEL: 'fun6'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
   %r = sdiv <2 x i32> %a, %b
   ret <2 x i32> %r
@@ -167,9 +167,9 @@ define <2 x i64> @fun15(<2 x i64> %a, <2 x i64> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %r = udiv <2 x i64> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
-; ARC15-LABEL: 'fun15'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i64> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+; Z17-LABEL: 'fun15'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i64> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
   %r = udiv <2 x i64> %a, %b
   ret <2 x i64> %r
@@ -180,9 +180,9 @@ define <4 x i32> @fun16(<4 x i32> %a, <4 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %r = udiv <4 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
-; ARC15-LABEL: 'fun16'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <4 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+; Z17-LABEL: 'fun16'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <4 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
   %r = udiv <4 x i32> %a, %b
   ret <4 x i32> %r
@@ -193,9 +193,9 @@ define <2 x i32> @fun17(<2 x i32> %a, <2 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %r = udiv <2 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
-; ARC15-LABEL: 'fun17'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+; Z17-LABEL: 'fun17'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
   %r = udiv <2 x i32> %a, %b
   ret <2 x i32> %r
@@ -282,9 +282,9 @@ define <2 x i64> @fun26(<2 x i64> %a, <2 x i64> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %r = srem <2 x i64> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
-; ARC15-LABEL: 'fun26'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i64> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+; Z17-LABEL: 'fun26'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i64> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
   %r = srem <2 x i64> %a, %b
   ret <2 x i64> %r
@@ -295,9 +295,9 @@ define <4 x i32> @fun27(<4 x i32> %a, <4 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %r = srem <4 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
-; ARC15-LABEL: 'fun27'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = srem <4 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+; Z17-LABEL: 'fun27'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = srem <4 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
   %r = srem <4 x i32> %a, %b
   ret <4 x i32> %r
@@ -308,9 +308,9 @@ define <2 x i32> @fun28(<2 x i32> %a, <2 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %r = srem <2 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
-; ARC15-LABEL: 'fun28'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+; Z17-LABEL: 'fun28'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
   %r = srem <2 x i32> %a, %b
   ret <2 x i32> %r
@@ -397,9 +397,9 @@ define <2 x i64> @fun37(<2 x i64> %a, <2 x i64> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %r = urem <2 x i64> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
-; ARC15-LABEL: 'fun37'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i64> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+; Z17-LABEL: 'fun37'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i64> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
 ;
   %r = urem <2 x i64> %a, %b
   ret <2 x i64> %r
@@ -410,9 +410,9 @@ define <4 x i32> @fun38(<4 x i32> %a, <4 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %r = urem <4 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
-; ARC15-LABEL: 'fun38'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = urem <4 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+; Z17-LABEL: 'fun38'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = urem <4 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
 ;
   %r = urem <4 x i32> %a, %b
   ret <4 x i32> %r
@@ -423,9 +423,9 @@ define <2 x i32> @fun39(<2 x i32> %a, <2 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %r = urem <2 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
-; ARC15-LABEL: 'fun39'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+; Z17-LABEL: 'fun39'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
 ;
   %r = urem <2 x i32> %a, %b
   ret <2 x i32> %r
@@ -473,9 +473,9 @@ define <8 x i64> @fun44(<8 x i64> %a, <8 x i64> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i64> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r
 ;
-; ARC15-LABEL: 'fun44'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %r = sdiv <8 x i64> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r
+; Z17-LABEL: 'fun44'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %r = sdiv <8 x i64> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r
 ;
   %r = sdiv <8 x i64> %a, %b
   ret <8 x i64> %r
@@ -486,9 +486,9 @@ define <8 x i32> @fun45(<8 x i32> %a, <8 x i32> %b) {
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i32> %a, %b
 ; Z13-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r
 ;
-; ARC15-LABEL: 'fun45'
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %r = urem <8 x i32> %a, %b
-; ARC15-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r
+; Z17-LABEL: 'fun45'
+; Z17-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %r = urem <8 x i32> %a, %b
+; Z17-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r
 ;
   %r = urem <8 x i32> %a, %b
   ret <8 x i32> %r
diff --git a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
index 105e634cea1ac..ba86c9ab1d702 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
@@ -1,12 +1,12 @@
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s --check-prefixes=CHECK,ARC15
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z17 | FileCheck %s --check-prefixes=CHECK,Z17
 ;
 
 define i128 @fun1(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: 'fun1'
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i128 %val1, %val2
 ; Z13:   Cost Model: Found an estimated cost of 5 for instruction:   %v128 = sext i1 %cmp to i128
-; ARC15: Cost Model: Found an estimated cost of 0 for instruction:   %v128 = sext i1 %cmp to i128
+; Z17:   Cost Model: Found an estimated cost of 0 for instruction:   %v128 = sext i1 %cmp to i128
   %cmp = icmp eq i128 %val1, %val2
   %v128 = sext i1 %cmp to i128
   ret i128 %v128
@@ -27,7 +27,7 @@ define i128 @fun3(i128 %val1, i128 %val2,
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp eq i128 %val1, %val2
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %add = add i128 %val3, %val4
 ; Z13:   Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, i128 %val3, i128 %add
-; ARC15: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i128 %val3, i128 %add
+; Z17:   Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i128 %val3, i128 %add
   %cmp = icmp eq i128 %val1, %val2
   %add = add i128 %val3, %val4
   %sel = select i1 %cmp, i128 %val3, i128 %add
@@ -40,7 +40,7 @@ define i64 @fun3_sel64(i128 %val1, i128 %val2,
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %cmp = icmp ugt i128 %val1, %val2
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %add = add i64 %val3, %val4
 ; Z13:   Cost Model: Found an estimated cost of 4 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %add
-; ARC15: Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %add
+; Z17:   Cost Model: Found an estimated cost of 1 for instruction:   %sel = select i1 %cmp, i64 %val3, i64 %add
   %cmp = icmp ugt i128 %val1, %val2
   %add = add i64 %val3, %val4
   %sel = select i1 %cmp, i64 %val3, i64 %add
diff --git a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
index bf5cbfb48a77b..ebeb2df281237 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s -check-prefix=ARC15
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z17 | FileCheck %s -check-prefix=Z17
 ;
 ; Note: The scalarized vector instructions costs are not including any
 ; extracts, due to the undef operands.
@@ -132,22 +132,22 @@ define void @mul() {
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res5 = mul <2 x i16> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res6 = mul <2 x i32> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 3 for instruction:   %res7 = mul <2 x i64> undef, undef
-; ARC15: Cost Model: Found an estimated cost of 1 for instruction:   %res7 = mul <2 x i64> undef, undef
+; Z17:   Cost Model: Found an estimated cost of 1 for instruction:   %res7 = mul <2 x i64> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res8 = mul <4 x i8> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res9 = mul <4 x i16> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res10 = mul <4 x i32> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 6 for instruction:   %res11 = mul <4 x i64> undef, undef
-; ARC15: Cost Model: Found an estimated cost of 2 for instruction:   %res11 = mul <4 x i64> undef, undef
+; Z17:   Cost Model: Found an estimated cost of 2 for instruction:   %res11 = mul <4 x i64> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res12 = mul <8 x i8> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res13 = mul <8 x i16> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res14 = mul <8 x i32> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 12 for instruction:   %res15 = mul <8 x i64> undef, undef
-; ARC15: Cost Model: Found an estimated cost of 4 for instruction:   %res15 = mul <8 x i64> undef, undef
+; Z17:   Cost Model: Found an estimated cost of 4 for instruction:   %res15 = mul <8 x i64> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %res16 = mul <16 x i8> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %res17 = mul <16 x i16> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %res18 = mul <16 x i32> undef, undef
 ; CHECK: Cost Model: Found an estimated cost of 24 for instruction:   %res19 = mul <16 x i64> undef, undef
-; ARC15: Cost Model: Found an estimated cost of 8 for instruction:   %res19 = mul <16 x i64> undef, undef
+; Z17:   Cost Model: Found an estimated cost of 8 for instruction:   %res19 = mul <16 x i64> undef, undef
 
   ret void;
 }
diff --git a/llvm/test/CodeGen/SystemZ/args-12.ll b/llvm/test/CodeGen/SystemZ/args-12.ll
index f8954eee550f5..472672bbfd5ca 100644
--- a/llvm/test/CodeGen/SystemZ/args-12.ll
+++ b/llvm/test/CodeGen/SystemZ/args-12.ll
@@ -2,7 +2,7 @@
 ; Test the handling of i128 argument values
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 declare void @bar(i64, i64, i64, i64, i128,
                   i64, i64, i64, i64, i128)
diff --git a/llvm/test/CodeGen/SystemZ/args-13.ll b/llvm/test/CodeGen/SystemZ/args-13.ll
index d9e986cbb6a4b..29a718901e811 100644
--- a/llvm/test/CodeGen/SystemZ/args-13.ll
+++ b/llvm/test/CodeGen/SystemZ/args-13.ll
@@ -2,7 +2,7 @@
 ; Test incoming i128 arguments.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Do some arithmetic so that we can see the register being used.
 define void @f1(ptr %r2, i16 %r3, i32 %r4, i64 %r5, i128 %r6) {
diff --git a/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll b/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll
index f5b0aaa243a79..bbd9be463a014 100644
--- a/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test bit deposit / extract intrinsics
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 declare i64 @llvm.s390.bdepg(i64, i64)
 declare i64 @llvm.s390.bextg(i64, i64)
diff --git a/llvm/test/CodeGen/SystemZ/int-abs-03.ll b/llvm/test/CodeGen/SystemZ/int-abs-03.ll
index 238b2431c9b30..2a8969c27fbc0 100644
--- a/llvm/test/CodeGen/SystemZ/int-abs-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-abs-03.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit absolute value in vector registers on arch15
+; Test 128-bit absolute value in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 define i128 @f1(i128 %src) {
 ; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/int-add-19.ll b/llvm/test/CodeGen/SystemZ/int-add-19.ll
index a9bce2c827ff9..f5ef08b4514f9 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-19.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-19.ll
@@ -2,7 +2,7 @@
 ; Test 128-bit addition in vector registers on z13 and later
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 define i128 @f1(i128 %a, i128 %b) {
 ; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-64.ll b/llvm/test/CodeGen/SystemZ/int-cmp-64.ll
index be212ef2a7211..821a57bf30bc1 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-64.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit comparisons in vector registers on arch15
+; Test 128-bit comparisons in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 -verify-machineinstrs | FileCheck %s
 
 ; Equality comparison.
 define i64 @f1(i128 %value1, i128 %value2, i64 %a, i64 %b) {
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-15.ll b/llvm/test/CodeGen/SystemZ/int-conv-15.ll
index bea0bb8890315..0d8ee75b10b85 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-15.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-15.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit arithmetic in vector registers on arch15
+; Test 128-bit arithmetic in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Sign extension from i64.
 define i128 @f1(i64 %a) {
diff --git a/llvm/test/CodeGen/SystemZ/int-div-08.ll b/llvm/test/CodeGen/SystemZ/int-div-08.ll
index a3723c1257974..5838d4913c862 100644
--- a/llvm/test/CodeGen/SystemZ/int-div-08.ll
+++ b/llvm/test/CodeGen/SystemZ/int-div-08.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit division and remainder in vector registers on arch15
+; Test 128-bit division and remainder in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Divide signed.
 define i128 @f1(i128 %a, i128 %b) {
diff --git a/llvm/test/CodeGen/SystemZ/int-max-02.ll b/llvm/test/CodeGen/SystemZ/int-max-02.ll
index bd5e9593e25e9..5f5188c66065d 100644
--- a/llvm/test/CodeGen/SystemZ/int-max-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-max-02.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test i128 maximum on arch15.
+; Test i128 maximum on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Test with slt.
 define i128 @f1(i128 %val1, i128 %val2) {
diff --git a/llvm/test/CodeGen/SystemZ/int-min-02.ll b/llvm/test/CodeGen/SystemZ/int-min-02.ll
index e4cdd25fbc006..3066af924fb8e 100644
--- a/llvm/test/CodeGen/SystemZ/int-min-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-min-02.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test i128 minimum on arch15.
+; Test i128 minimum on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Test with slt.
 define i128 @f1(i128 %val1, i128 %val2) {
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-14.ll b/llvm/test/CodeGen/SystemZ/int-mul-14.ll
index e7e0889634d10..6678e90f3bfad 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-14.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-14.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit multiplication in vector registers on arch15
+; Test 128-bit multiplication in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Multiplication.
 define i128 @f1(i128 %a, i128 %b) {
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-15.ll b/llvm/test/CodeGen/SystemZ/int-mul-15.ll
index a4a0faa0cb0c8..b7d41412d9c5f 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-15.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-15.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; Test high-part i64->i128 multiplications on arch15.
+; Test high-part i64->i128 multiplications on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Check zero-extended multiplication in which only the high part is used.
 define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-16.ll b/llvm/test/CodeGen/SystemZ/int-mul-16.ll
index d84ca93e3b12c..772c419dfc8e0 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-16.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-16.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test high-part i128->i256 multiplications on arch15.
+; Test high-part i128->i256 multiplications on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Multiply high signed.
 define i128 @f1(i128 %a, i128 %b) {
diff --git a/llvm/test/CodeGen/SystemZ/int-neg-04.ll b/llvm/test/CodeGen/SystemZ/int-neg-04.ll
index 05b7b397e735d..a6da2db7d14b4 100644
--- a/llvm/test/CodeGen/SystemZ/int-neg-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-neg-04.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit negation in vector registers on arch15
+; Test 128-bit negation in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 define i128 @f1(i128 %src) {
 ; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-12.ll b/llvm/test/CodeGen/SystemZ/int-sub-12.ll
index 8f7d816d5cbd2..44d2adfb41dc7 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-12.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-12.ll
@@ -2,7 +2,7 @@
 ; Test 128-bit subtraction in vector registers on z13 and later
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 define i128 @f1(i128 %a, i128 %b) {
 ; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/llxa-01.ll b/llvm/test/CodeGen/SystemZ/llxa-01.ll
index 19bc6ef31a286..2c57556dc9ee2 100644
--- a/llvm/test/CodeGen/SystemZ/llxa-01.ll
+++ b/llvm/test/CodeGen/SystemZ/llxa-01.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD LOGICAL INDEXED ADDRESS byte instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; DO NOT USE: LLXAB with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/llxa-02.ll b/llvm/test/CodeGen/SystemZ/llxa-02.ll
index 0ca2527dcb25e..e2cd929a0bc94 100644
--- a/llvm/test/CodeGen/SystemZ/llxa-02.ll
+++ b/llvm/test/CodeGen/SystemZ/llxa-02.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD LOGICAL INDEXED ADDRESS halfword instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LLXAH with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/llxa-03.ll b/llvm/test/CodeGen/SystemZ/llxa-03.ll
index b6c9406785188..b5c91b1d7e607 100644
--- a/llvm/test/CodeGen/SystemZ/llxa-03.ll
+++ b/llvm/test/CodeGen/SystemZ/llxa-03.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD LOGICAL INDEXED ADDRESS word instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LLXAF with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/llxa-04.ll b/llvm/test/CodeGen/SystemZ/llxa-04.ll
index 9c5cd2f54bc67..186892dd755a7 100644
--- a/llvm/test/CodeGen/SystemZ/llxa-04.ll
+++ b/llvm/test/CodeGen/SystemZ/llxa-04.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD LOGICAL INDEXED ADDRESS doubleword instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LLXAG with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/llxa-05.ll b/llvm/test/CodeGen/SystemZ/llxa-05.ll
index eba400f6d2564..1e5880de57d58 100644
--- a/llvm/test/CodeGen/SystemZ/llxa-05.ll
+++ b/llvm/test/CodeGen/SystemZ/llxa-05.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD LOGICAL INDEXED ADDRESS quadword instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LLXAQ with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/lxa-01.ll b/llvm/test/CodeGen/SystemZ/lxa-01.ll
index fb3edeaaeb381..8bba6f78f503d 100644
--- a/llvm/test/CodeGen/SystemZ/lxa-01.ll
+++ b/llvm/test/CodeGen/SystemZ/lxa-01.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD INDEXED ADDRESS byte instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; DO NOT USE: LXAB with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/lxa-02.ll b/llvm/test/CodeGen/SystemZ/lxa-02.ll
index 64816fa24838e..c233bf7d28a5a 100644
--- a/llvm/test/CodeGen/SystemZ/lxa-02.ll
+++ b/llvm/test/CodeGen/SystemZ/lxa-02.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD INDEXED ADDRESS halfword instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LXAH with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/lxa-03.ll b/llvm/test/CodeGen/SystemZ/lxa-03.ll
index e73d43a48ebd8..43e9b4d14d6c6 100644
--- a/llvm/test/CodeGen/SystemZ/lxa-03.ll
+++ b/llvm/test/CodeGen/SystemZ/lxa-03.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD INDEXED ADDRESS word instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LXAF with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/lxa-04.ll b/llvm/test/CodeGen/SystemZ/lxa-04.ll
index 7b6764cf22faf..96af585547e34 100644
--- a/llvm/test/CodeGen/SystemZ/lxa-04.ll
+++ b/llvm/test/CodeGen/SystemZ/lxa-04.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD INDEXED ADDRESS doubleword instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LXAG with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/lxa-05.ll b/llvm/test/CodeGen/SystemZ/lxa-05.ll
index 0a45cba0b3f83..4f0b4e838f157 100644
--- a/llvm/test/CodeGen/SystemZ/lxa-05.ll
+++ b/llvm/test/CodeGen/SystemZ/lxa-05.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of LOAD INDEXED ADDRESS quadword instructions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; LXAQ with base and index.
 define dso_local ptr @f0(ptr %ptr, i32 %idx) {
diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll
index f18ee2418383c..3dbd18fb8cc60 100644
--- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll
+++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 ;
 ; FIXME: two consecutive immediate adds not fused in i16/i8 functions.
 
diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll
index bb50e6f417c42..10d28d571bb92 100644
--- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll
+++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 declare i128 @llvm.ctlz.i128(i128, i1)
 
diff --git a/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll b/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll
index 2f3a72160ae27..e1237280ae23e 100644
--- a/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll
+++ b/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 declare i64 @llvm.cttz.i64(i64, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
diff --git a/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll b/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll
index f440871fd4ff0..fdfebef1a1e18 100644
--- a/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll
+++ b/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test 128-bit arithmetic in vector registers on arch15
+; Test 128-bit arithmetic in vector registers on z17
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 declare i128 @llvm.cttz.i128(i128, i1)
 
diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll
index 3f6c86e685ea1..cb8850e58c589 100644
--- a/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll
@@ -1,6 +1,6 @@
-; Test usage of VBLEND on arch15.
+; Test usage of VBLEND on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
 ; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/vec-div-03.ll b/llvm/test/CodeGen/SystemZ/vec-div-03.ll
index 96b161948e39b..1c2a702baf1a3 100644
--- a/llvm/test/CodeGen/SystemZ/vec-div-03.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-div-03.ll
@@ -1,6 +1,6 @@
-; Test vector division on arch15.
+; Test vector division on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Test a v4i32 signed division.
 define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll
index 262ab0ea8bb2b..bcdedcd3a407b 100644
--- a/llvm/test/CodeGen/SystemZ/vec-eval.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; Test use of VECTOR EVALUATE for combined boolean operations.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 define <16 x i8> @eval0(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval0:
@@ -279,8 +279,8 @@ entry:
 define <16 x i8> @eval24(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval24:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v26, %v28, %v24, 2
+; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v0, %v24, %v1, 47
 ; CHECK-NEXT:    br %r14
 entry:
@@ -376,8 +376,8 @@ entry:
 define <16 x i8> @eval30(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval30:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v26, %v28, %v24, 2
+; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v0, %v24, %v1, 47
 ; CHECK-NEXT:    br %r14
 entry:
@@ -596,8 +596,8 @@ entry:
 define <16 x i8> @eval45(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval45:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v0, %v28, %v24
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT:    vo %v0, %v28, %v24
 ; CHECK-NEXT:    veval %v1, %v1, %v24, %v26, 47
 ; CHECK-NEXT:    veval %v24, %v1, %v26, %v0, 47
 ; CHECK-NEXT:    br %r14
@@ -617,8 +617,8 @@ entry:
 define <16 x i8> @eval46(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval46:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v26, %v28, %v24, 8
+; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v0, %v24, %v1, 47
 ; CHECK-NEXT:    br %r14
 entry:
@@ -722,8 +722,8 @@ entry:
 define <16 x i8> @eval54(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval54:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vn %v1, %v28, %v24
 ; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 2
+; CHECK-NEXT:    vn %v1, %v28, %v24
 ; CHECK-NEXT:    veval %v24, %v0, %v26, %v1, 47
 ; CHECK-NEXT:    br %r14
 entry:
@@ -770,8 +770,8 @@ entry:
 define <16 x i8> @eval57(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval57:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v0, %v28, %v26
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT:    vo %v0, %v28, %v26
 ; CHECK-NEXT:    veval %v1, %v1, %v26, %v24, 47
 ; CHECK-NEXT:    veval %v24, %v1, %v24, %v0, 47
 ; CHECK-NEXT:    br %r14
@@ -1060,8 +1060,8 @@ define <16 x i8> @eval77(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval77:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vgbm %v0, 65535
-; CHECK-NEXT:    vn %v1, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT:    vn %v1, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v28, %v0, %v1, 7
 ; CHECK-NEXT:    veval %v24, %v0, %v24, %v26, 47
 ; CHECK-NEXT:    br %r14
@@ -1540,10 +1540,10 @@ define <16 x i8> @eval109(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval109:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vgbm %v0, 65535
-; CHECK-NEXT:    vn %v2, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
-; CHECK-NEXT:    vo %v1, %v28, %v24
+; CHECK-NEXT:    vn %v2, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v28, %v0, %v2, 7
+; CHECK-NEXT:    vo %v1, %v28, %v24
 ; CHECK-NEXT:    veval %v0, %v0, %v24, %v26, 47
 ; CHECK-NEXT:    veval %v24, %v0, %v26, %v1, 47
 ; CHECK-NEXT:    br %r14
@@ -1621,8 +1621,8 @@ define <16 x i8> @eval113(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval113:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vgbm %v0, 65535
-; CHECK-NEXT:    vn %v1, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT:    vn %v1, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v28, %v0, %v1, 7
 ; CHECK-NEXT:    veval %v24, %v0, %v26, %v24, 47
 ; CHECK-NEXT:    br %r14
@@ -1731,8 +1731,8 @@ define <16 x i8> @eval120(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vno %v0, %v24, %v24
 ; CHECK-NEXT:    veval %v0, %v0, %v28, %v26, 2
-; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v0, %v24, %v1, 47
 ; CHECK-NEXT:    br %r14
 entry:
@@ -1753,10 +1753,10 @@ define <16 x i8> @eval121(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval121:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vgbm %v0, 65535
-; CHECK-NEXT:    vn %v2, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v24, %v0, %v26, 40
-; CHECK-NEXT:    vo %v1, %v28, %v26
+; CHECK-NEXT:    vn %v2, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v28, %v0, %v2, 7
+; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v0, %v26, %v24, 47
 ; CHECK-NEXT:    veval %v24, %v0, %v24, %v1, 47
 ; CHECK-NEXT:    br %r14
@@ -1802,8 +1802,8 @@ define <16 x i8> @eval123(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vno %v0, %v24, %v24
 ; CHECK-NEXT:    veval %v0, %v0, %v28, %v26, 2
-; CHECK-NEXT:    voc %v1, %v26, %v28
 ; CHECK-NEXT:    veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT:    voc %v1, %v26, %v28
 ; CHECK-NEXT:    veval %v24, %v0, %v1, %v24, 31
 ; CHECK-NEXT:    br %r14
 entry:
@@ -2084,8 +2084,8 @@ entry:
 define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval141:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v0, %v26, %v24
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT:    vo %v0, %v26, %v24
 ; CHECK-NEXT:    veval %v1, %v1, %v24, %v26, 47
 ; CHECK-NEXT:    veval %v24, %v1, %v0, %v28, 143
 ; CHECK-NEXT:    br %r14
@@ -2105,8 +2105,8 @@ entry:
 define <16 x i8> @eval142(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval142:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v26, %v24, %v28, 127
+; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v24, %v1, %v0, 174
 ; CHECK-NEXT:    br %r14
 entry:
@@ -2253,8 +2253,8 @@ entry:
 define <16 x i8> @eval151(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval151:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vx %v0, %v28, %v26
 ; CHECK-NEXT:    veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT:    vx %v0, %v28, %v26
 ; CHECK-NEXT:    veval %v1, %v1, %v26, %v24, 31
 ; CHECK-NEXT:    veval %v24, %v1, %v0, %v24, 143
 ; CHECK-NEXT:    br %r14
@@ -2289,8 +2289,8 @@ entry:
 define <16 x i8> @eval153(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval153:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v2, %v26, %v24, %v28, 1
 ; CHECK-NEXT:    veval %v24, %v2, %v0, %v1, 239
 ; CHECK-NEXT:    br %r14
@@ -2309,8 +2309,8 @@ entry:
 define <16 x i8> @eval154(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval154:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v2, %v24, %v26, %v28, 2
 ; CHECK-NEXT:    veval %v24, %v2, %v0, %v1, 239
 ; CHECK-NEXT:    br %r14
@@ -2330,9 +2330,9 @@ entry:
 define <16 x i8> @eval155(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval155:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 111
 ; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    vn %v2, %v26, %v24
-; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 111
 ; CHECK-NEXT:    veval %v24, %v2, %v0, %v1, 239
 ; CHECK-NEXT:    br %r14
 entry:
@@ -2365,8 +2365,8 @@ entry:
 define <16 x i8> @eval157(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval157:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vx %v0, %v28, %v26
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT:    vx %v0, %v28, %v26
 ; CHECK-NEXT:    veval %v1, %v1, %v24, %v26, 47
 ; CHECK-NEXT:    veval %v24, %v1, %v0, %v24, 143
 ; CHECK-NEXT:    br %r14
@@ -2386,8 +2386,8 @@ entry:
 define <16 x i8> @eval158(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval158:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT:    vn %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v24, %v1, %v0, 174
 ; CHECK-NEXT:    br %r14
 entry:
@@ -2685,8 +2685,8 @@ entry:
 define <16 x i8> @eval178(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval178:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vn %v1, %v26, %v24
 ; CHECK-NEXT:    veval %v0, %v26, %v28, %v24, 138
+; CHECK-NEXT:    vn %v1, %v26, %v24
 ; CHECK-NEXT:    veval %v24, %v0, %v1, %v28, 47
 ; CHECK-NEXT:    br %r14
 entry:
@@ -2778,8 +2778,8 @@ entry:
 define <16 x i8> @eval183(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval183:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    voc %v0, %v26, %v28
 ; CHECK-NEXT:    veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT:    voc %v0, %v26, %v28
 ; CHECK-NEXT:    veval %v1, %v1, %v26, %v24, 31
 ; CHECK-NEXT:    veval %v24, %v1, %v0, %v24, 47
 ; CHECK-NEXT:    br %r14
@@ -2884,8 +2884,8 @@ entry:
 define <16 x i8> @eval189(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval189:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    voc %v0, %v26, %v28
 ; CHECK-NEXT:    veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT:    voc %v0, %v26, %v28
 ; CHECK-NEXT:    veval %v1, %v1, %v24, %v26, 47
 ; CHECK-NEXT:    veval %v24, %v1, %v0, %v24, 47
 ; CHECK-NEXT:    br %r14
@@ -3480,8 +3480,8 @@ define <16 x i8> @eval228(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval228:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vno %v0, %v26, %v26
-; CHECK-NEXT:    vo %v1, %v28, %v24
 ; CHECK-NEXT:    veval %v2, %v24, %v28, %v26, 2
+; CHECK-NEXT:    vo %v1, %v28, %v24
 ; CHECK-NEXT:    veval %v0, %v2, %v0, %v24, 47
 ; CHECK-NEXT:    veval %v24, %v0, %v26, %v1, 47
 ; CHECK-NEXT:    br %r14
@@ -3564,8 +3564,8 @@ entry:
 define <16 x i8> @eval232(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval232:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 31
+; CHECK-NEXT:    vo %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v24, %v1, %v0, 174
 ; CHECK-NEXT:    br %r14
 entry:
@@ -3582,8 +3582,8 @@ entry:
 define <16 x i8> @eval233(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
 ; CHECK-LABEL: eval233:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vx %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v0, %v24, %v28, %v26, 31
+; CHECK-NEXT:    vx %v1, %v28, %v26
 ; CHECK-NEXT:    veval %v24, %v24, %v1, %v0, 174
 ; CHECK-NEXT:    br %r14
 entry:
diff --git a/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll b/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll
index e750f1e3e7b47..5bbabdd2d56fc 100644
--- a/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; Test vector intrinsics added with arch15.
+; Test vector intrinsics added with z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 declare <16 x i8> @llvm.s390.vgemb(<8 x i16>)
 declare <8 x i16> @llvm.s390.vgemh(<16 x i8>)
diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-06.ll b/llvm/test/CodeGen/SystemZ/vec-mul-06.ll
index 22b1b5de62c57..3850a8f60eb16 100644
--- a/llvm/test/CodeGen/SystemZ/vec-mul-06.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-mul-06.ll
@@ -1,6 +1,6 @@
-; Test vector multiplication on arch15.
+; Test vector multiplication on z17.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
 
 ; Test a v2i64 multiplication.
 define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt b/llvm/test/MC/Disassembler/SystemZ/insns-z17.txt
similarity index 99%
rename from llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt
rename to llvm/test/MC/Disassembler/SystemZ/insns-z17.txt
index 93274e6659801..c5a30b072d991 100644
--- a/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt
+++ b/llvm/test/MC/Disassembler/SystemZ/insns-z17.txt
@@ -1,5 +1,5 @@
-# Test arch15 instructions that don't have PC-relative operands.
-# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=arch15 \
+# Test z17 instructions that don't have PC-relative operands.
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z17 \
 # RUN:   | FileCheck %s
 
 # CHECK: bdepg %r0, %r0, %r0
diff --git a/llvm/test/MC/SystemZ/insn-bad-arch15.s b/llvm/test/MC/SystemZ/insn-bad-z17.s
similarity index 98%
rename from llvm/test/MC/SystemZ/insn-bad-arch15.s
rename to llvm/test/MC/SystemZ/insn-bad-z17.s
index 915efbc942306..02e26220490f4 100644
--- a/llvm/test/MC/SystemZ/insn-bad-arch15.s
+++ b/llvm/test/MC/SystemZ/insn-bad-z17.s
@@ -1,5 +1,5 @@
-# For arch15 only.
-# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch15 < %s 2> %t
+# For z17 only.
+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z17 < %s 2> %t
 # RUN: FileCheck < %t %s
 
 #CHECK: error: invalid use of indexed addressing
diff --git a/llvm/test/MC/SystemZ/insn-good-arch15.s b/llvm/test/MC/SystemZ/insn-good-z17.s
similarity index 99%
rename from llvm/test/MC/SystemZ/insn-good-arch15.s
rename to llvm/test/MC/SystemZ/insn-good-z17.s
index 46ff13db0b549..96f27137e4821 100644
--- a/llvm/test/MC/SystemZ/insn-good-arch15.s
+++ b/llvm/test/MC/SystemZ/insn-good-z17.s
@@ -1,5 +1,5 @@
-# For arch15 and above.
-# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=arch15 -show-encoding %s \
+# For z17 and above.
+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=z17 -show-encoding %s \
 # RUN:   | FileCheck %s
 
 #CHECK: bdepg	%r0, %r0, %r0           # encoding: [0xb9,0x6d,0x00,0x00]
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index c5b96e1df904e..2a3958151a604 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -340,7 +340,7 @@ TEST(getLinuxHostCPUName, s390x) {
 
   // Model Id: 9175
   ExpectedCPUs.push_back("zEC12");
-  ExpectedCPUs.push_back("arch15");
+  ExpectedCPUs.push_back("z17");
 
   // Model Id: 3931
   ExpectedCPUs.push_back("zEC12");

From 4370072022e5265d51b64182608e133277a24ac0 Mon Sep 17 00:00:00 2001
From: Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
Date: Tue, 29 Apr 2025 14:54:30 +0200
Subject: [PATCH 18/83] [clang] Forward TPL of NestedNameSpecifier

This avoids type suffixes for integer constants when the type can be
inferred from the template parameter, such as the unsigned parameter
of A<1> and A<2> in the added test.
---
 clang/lib/AST/NestedNameSpecifier.cpp         | 17 ++--
 clang/unittests/Tooling/QualTypeNamesTest.cpp | 96 +++++++++++++++++++
 2 files changed, 106 insertions(+), 7 deletions(-)

diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp
index 76c77569da9fd..c043996f1ada3 100644
--- a/clang/lib/AST/NestedNameSpecifier.cpp
+++ b/clang/lib/AST/NestedNameSpecifier.cpp
@@ -283,13 +283,16 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy,
   case TypeSpec: {
     const auto *Record =
             dyn_cast_or_null<ClassTemplateSpecializationDecl>(getAsRecordDecl());
-    if (ResolveTemplateArguments && Record) {
+    const TemplateParameterList *TPL = nullptr;
+    if (Record) {
+      TPL = Record->getSpecializedTemplate()->getTemplateParameters();
+      if (ResolveTemplateArguments) {
         // Print the type trait with resolved template parameters.
         Record->printName(OS, Policy);
-        printTemplateArgumentList(
-            OS, Record->getTemplateArgs().asArray(), Policy,
-            Record->getSpecializedTemplate()->getTemplateParameters());
+        printTemplateArgumentList(OS, Record->getTemplateArgs().asArray(),
+                                  Policy, TPL);
         break;
+      }
     }
     const Type *T = getAsType();
 
@@ -313,8 +316,8 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy,
                                         TemplateName::Qualified::None);
 
       // Print the template argument list.
-      printTemplateArgumentList(OS, SpecType->template_arguments(),
-                                InnerPolicy);
+      printTemplateArgumentList(OS, SpecType->template_arguments(), InnerPolicy,
+                                TPL);
     } else if (const auto *DepSpecType =
                    dyn_cast<DependentTemplateSpecializationType>(T)) {
       // Print the template name without its corresponding
@@ -322,7 +325,7 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy,
       OS << DepSpecType->getIdentifier()->getName();
       // Print the template argument list.
       printTemplateArgumentList(OS, DepSpecType->template_arguments(),
-                                InnerPolicy);
+                                InnerPolicy, TPL);
     } else {
       // Print the type normally
       QualType(T, 0).print(OS, InnerPolicy);
diff --git a/clang/unittests/Tooling/QualTypeNamesTest.cpp b/clang/unittests/Tooling/QualTypeNamesTest.cpp
index 5ded64d4fcc8c..49c40d633ad4b 100644
--- a/clang/unittests/Tooling/QualTypeNamesTest.cpp
+++ b/clang/unittests/Tooling/QualTypeNamesTest.cpp
@@ -265,6 +265,102 @@ TEST(QualTypeNameTest, InlineNamespace) {
                           TypeNameVisitor::Lang_CXX11);
 }
 
+TEST(QualTypeNameTest, TemplatedClass) {
+  std::unique_ptr<ASTUnit> AST =
+      tooling::buildASTFromCode("template <unsigned U1> struct A {\n"
+                                "  template <unsigned U2> struct B {};\n"
+                                "};\n"
+                                "template struct A<1>;\n"
+                                "template struct A<2u>;\n"
+                                "template struct A<1>::B<3>;\n"
+                                "template struct A<2u>::B<4u>;\n");
+
+  auto &Context = AST->getASTContext();
+  auto &Policy = Context.getPrintingPolicy();
+  auto getFullyQualifiedName = [&](QualType QT) {
+    return TypeName::getFullyQualifiedName(QT, Context, Policy);
+  };
+
+  auto *A = Context.getTranslationUnitDecl()
+                ->lookup(&Context.Idents.get("A"))
+                .find_first<ClassTemplateDecl>();
+  ASSERT_NE(A, nullptr);
+
+  // A has two explicit instantiations: A<1> and A<2u>
+  auto ASpec = A->spec_begin();
+  ASSERT_NE(ASpec, A->spec_end());
+  auto *A1 = *ASpec;
+  ASpec++;
+  ASSERT_NE(ASpec, A->spec_end());
+  auto *A2 = *ASpec;
+
+  // Their type names follow the records.
+  QualType A1RecordTy = Context.getRecordType(A1);
+  EXPECT_EQ(getFullyQualifiedName(A1RecordTy), "A<1>");
+  QualType A2RecordTy = Context.getRecordType(A2);
+  EXPECT_EQ(getFullyQualifiedName(A2RecordTy), "A<2U>");
+
+  // getTemplateSpecializationType() gives types that print the integral
+  // argument directly.
+  TemplateArgument Args1[] = {
+      {Context, llvm::APSInt::getUnsigned(1u), Context.UnsignedIntTy}};
+  QualType A1TemplateSpecTy =
+      Context.getTemplateSpecializationType(TemplateName(A), Args1, A1RecordTy);
+  EXPECT_EQ(A1TemplateSpecTy.getAsString(), "A<1>");
+
+  TemplateArgument Args2[] = {
+      {Context, llvm::APSInt::getUnsigned(2u), Context.UnsignedIntTy}};
+  QualType A2TemplateSpecTy =
+      Context.getTemplateSpecializationType(TemplateName(A), Args2, A2RecordTy);
+  EXPECT_EQ(A2TemplateSpecTy.getAsString(), "A<2>");
+
+  // Find A<1>::B and its specialization B<3>.
+  auto *A1B =
+      A1->lookup(&Context.Idents.get("B")).find_first<ClassTemplateDecl>();
+  ASSERT_NE(A1B, nullptr);
+  auto A1BSpec = A1B->spec_begin();
+  ASSERT_NE(A1BSpec, A1B->spec_end());
+  auto *A1B3 = *A1BSpec;
+  QualType A1B3RecordTy = Context.getRecordType(A1B3);
+  EXPECT_EQ(getFullyQualifiedName(A1B3RecordTy), "A<1>::B<3>");
+
+  // Construct A<1>::B<3> and check name.
+  TemplateArgument Args3[] = {
+      {Context, llvm::APSInt::getUnsigned(3u), Context.UnsignedIntTy}};
+  QualType A1B3TemplateSpecTy = Context.getTemplateSpecializationType(
+      TemplateName(A1B), Args3, A1B3RecordTy);
+  EXPECT_EQ(A1B3TemplateSpecTy.getAsString(), "B<3>");
+
+  NestedNameSpecifier *A1Nested = NestedNameSpecifier::Create(
+      Context, nullptr, false, A1TemplateSpecTy.getTypePtr());
+  QualType A1B3ElaboratedTy = Context.getElaboratedType(
+      ElaboratedTypeKeyword::None, A1Nested, A1B3TemplateSpecTy);
+  EXPECT_EQ(A1B3ElaboratedTy.getAsString(), "A<1>::B<3>");
+
+  // Find A<2u>::B and its specialization B<4u>.
+  auto *A2B =
+      A2->lookup(&Context.Idents.get("B")).find_first<ClassTemplateDecl>();
+  ASSERT_NE(A2B, nullptr);
+  auto A2BSpec = A2B->spec_begin();
+  ASSERT_NE(A2BSpec, A2B->spec_end());
+  auto *A2B4 = *A2BSpec;
+  QualType A2B4RecordTy = Context.getRecordType(A2B4);
+  EXPECT_EQ(getFullyQualifiedName(A2B4RecordTy), "A<2U>::B<4U>");
+
+  // Construct A<2>::B<4> and check name.
+  TemplateArgument Args4[] = {
+      {Context, llvm::APSInt::getUnsigned(4u), Context.UnsignedIntTy}};
+  QualType A2B4TemplateSpecTy = Context.getTemplateSpecializationType(
+      TemplateName(A2B), Args4, A2B4RecordTy);
+  EXPECT_EQ(A2B4TemplateSpecTy.getAsString(), "B<4>");
+
+  NestedNameSpecifier *A2Nested = NestedNameSpecifier::Create(
+      Context, nullptr, false, A2TemplateSpecTy.getTypePtr());
+  QualType A2B4ElaboratedTy = Context.getElaboratedType(
+      ElaboratedTypeKeyword::None, A2Nested, A2B4TemplateSpecTy);
+  EXPECT_EQ(A2B4ElaboratedTy.getAsString(), "A<2>::B<4>");
+}
+
 TEST(QualTypeNameTest, AnonStrucs) {
   TypeNameVisitor AnonStrucs;
   AnonStrucs.ExpectedQualTypeNames["a"] = "short";

From f811c7df0a105549aeae2aa42ca31f6d55e652f2 Mon Sep 17 00:00:00 2001
From: davidtrevelyan <davidtrevelyan@users.noreply.github.com>
Date: Thu, 13 Mar 2025 10:18:25 +0000
Subject: [PATCH 19/83] [rtsan][Apple] Add interceptor for _os_nospin_lock_lock
 (#131034)

Follows the discussion here:
https://github.com/llvm/llvm-project/pull/129309

Recently, the test
`TestRtsan.AccessingALargeAtomicVariableDiesWhenRealtime` has been
failing on newer MacOS versions, because the internal locking mechanism
in `std::atomic<T>::load` (for types `T` that are larger than the
hardware lock-free limit), has changed to a function that wasn't being
intercepted by rtsan.

This PR introduces an interceptor for `_os_nospin_lock_lock`, which is
the new internal locking mechanism.

_Note: we'd probably do well to introduce interceptors for
`_os_nospin_lock_unlock` (and `os_unfair_lock_unlock`) too, which also
appear to have blocking implementations. This can follow in a separate
PR._

(cherry picked from commit 481a55a3d9645a6bc1540d326319b78ad8ed8db1)
---
 .../lib/rtsan/rtsan_interceptors_posix.cpp    | 11 +++++++++++
 .../tests/rtsan_test_interceptors_posix.cpp   | 19 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index 6816119065263..4d602a88ba9ae 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
@@ -30,6 +30,12 @@
 extern "C" {
 typedef int32_t OSSpinLock;
 void OSSpinLockLock(volatile OSSpinLock *__lock);
+// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but
+// it's an internal implementation detail of `os/lock.c` on Darwin, and
+// therefore not available in any headers. As a workaround, we forward declare
+// it here, which is enough to facilitate interception of _os_nospin_lock_lock.
+struct _os_nospin_lock_s;
+using _os_nospin_lock_t = _os_nospin_lock_s *;
 }
 #endif // TARGET_OS_MAC
 
@@ -642,6 +648,11 @@ INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
   __rtsan_notify_intercepted_call("os_unfair_lock_lock");
   return REAL(os_unfair_lock_lock)(lock);
 }
+
+INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) {
+  __rtsan_notify_intercepted_call("_os_nospin_lock_lock");
+  return REAL(_os_nospin_lock_lock)(lock);
+}
 #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK                              \
   INTERCEPT_FUNCTION(os_unfair_lock_lock)
 #else
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
index 59663776366bb..75f723081c4b6 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
@@ -1058,6 +1058,25 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) {
   ExpectRealtimeDeath(Func, "os_unfair_lock_lock");
   ExpectNonRealtimeSurvival(Func);
 }
+
+// We intercept _os_nospin_lock_lock because it's the internal
+// locking mechanism for MacOS's atomic implementation for data
+// types that are larger than the hardware's maximum lock-free size.
+// However, it's a private implementation detail and not visible in any headers,
+// so we must duplicate the required type definitions to forward declaration
+// what we need here.
+extern "C" {
+struct _os_nospin_lock_s {
+  unsigned int oul_value;
+};
+void _os_nospin_lock_lock(_os_nospin_lock_s *);
+}
+TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) {
+  _os_nospin_lock_s lock{};
+  auto Func = [&]() { _os_nospin_lock_lock(&lock); };
+  ExpectRealtimeDeath(Func, "_os_nospin_lock_lock");
+  ExpectNonRealtimeSurvival(Func);
+}
 #endif
 
 #if SANITIZER_LINUX

From b7b834e2a20ed295eaab596dd348db5463b951d8 Mon Sep 17 00:00:00 2001
From: thetruestblue <bblueconway@gmail.com>
Date: Fri, 18 Apr 2025 11:25:31 -0700
Subject: [PATCH 20/83] [RTSan][Darwin] Adjust OSSpinLock/_os_nospin_lock
 interceptor and tests (#132867)

These changes align with these lock types and allows builds and tests to
pass with various SDKS.

rdar://147067322
(cherry picked from commit 7cc4472037b43971bd3ee373fe75b5043f5abca9)
---
 .../lib/rtsan/rtsan_interceptors_posix.cpp    | 37 +++++++----------
 .../tests/rtsan_test_interceptors_posix.cpp   | 40 +++++++++----------
 2 files changed, 32 insertions(+), 45 deletions(-)

diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index 4d602a88ba9ae..040f501ee52e9 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
@@ -21,24 +21,6 @@
 #include "rtsan/rtsan.h"
 
 #if SANITIZER_APPLE
-
-#if TARGET_OS_MAC
-// On MacOS OSSpinLockLock is deprecated and no longer present in the headers,
-// but the symbol still exists on the system. Forward declare here so we
-// don't get compilation errors.
-#include <stdint.h>
-extern "C" {
-typedef int32_t OSSpinLock;
-void OSSpinLockLock(volatile OSSpinLock *__lock);
-// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but
-// it's an internal implementation detail of `os/lock.c` on Darwin, and
-// therefore not available in any headers. As a workaround, we forward declare
-// it here, which is enough to facilitate interception of _os_nospin_lock_lock.
-struct _os_nospin_lock_s;
-using _os_nospin_lock_t = _os_nospin_lock_s *;
-}
-#endif // TARGET_OS_MAC
-
 #include <libkern/OSAtomic.h>
 #include <os/lock.h>
 #endif // SANITIZER_APPLE
@@ -633,26 +615,35 @@ INTERCEPTOR(mode_t, umask, mode_t cmask) {
 #pragma clang diagnostic push
 // OSSpinLockLock is deprecated, but still in use in libc++
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#undef OSSpinLockLock
+
 INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) {
   __rtsan_notify_intercepted_call("OSSpinLockLock");
   return REAL(OSSpinLockLock)(lock);
 }
-#pragma clang diagnostic pop
+
 #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK INTERCEPT_FUNCTION(OSSpinLockLock)
 #else
 #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK
 #endif // SANITIZER_APPLE
 
 #if SANITIZER_APPLE
-INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
-  __rtsan_notify_intercepted_call("os_unfair_lock_lock");
-  return REAL(os_unfair_lock_lock)(lock);
-}
+// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro.
+typedef volatile OSSpinLock *_os_nospin_lock_t;
 
 INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) {
   __rtsan_notify_intercepted_call("_os_nospin_lock_lock");
   return REAL(_os_nospin_lock_lock)(lock);
 }
+#pragma clang diagnostic pop // "-Wdeprecated-declarations"
+#endif                       // SANITIZER_APPLE
+
+#if SANITIZER_APPLE
+INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
+  __rtsan_notify_intercepted_call("os_unfair_lock_lock");
+  return REAL(os_unfair_lock_lock)(lock);
+}
+
 #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK                              \
   INTERCEPT_FUNCTION(os_unfair_lock_lock)
 #else
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
index 75f723081c4b6..7eda884951c83 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp
@@ -1036,10 +1036,18 @@ TEST(TestRtsanInterceptors, PthreadJoinDiesWhenRealtime) {
 }
 
 #if SANITIZER_APPLE
-
 #pragma clang diagnostic push
 // OSSpinLockLock is deprecated, but still in use in libc++
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#undef OSSpinLockLock
+extern "C" {
+typedef int32_t OSSpinLock;
+void OSSpinLockLock(volatile OSSpinLock *__lock);
+// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro.
+typedef volatile OSSpinLock *_os_nospin_lock_t;
+void _os_nospin_lock_lock(_os_nospin_lock_t lock);
+}
+
 TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) {
   auto Func = []() {
     OSSpinLock spin_lock{};
@@ -1048,7 +1056,14 @@ TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) {
   ExpectRealtimeDeath(Func, "OSSpinLockLock");
   ExpectNonRealtimeSurvival(Func);
 }
-#pragma clang diagnostic pop
+
+TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) {
+  OSSpinLock lock{};
+  auto Func = [&]() { _os_nospin_lock_lock(&lock); };
+  ExpectRealtimeDeath(Func, "_os_nospin_lock_lock");
+  ExpectNonRealtimeSurvival(Func);
+}
+#pragma clang diagnostic pop //"-Wdeprecated-declarations"
 
 TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) {
   auto Func = []() {
@@ -1058,26 +1073,7 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) {
   ExpectRealtimeDeath(Func, "os_unfair_lock_lock");
   ExpectNonRealtimeSurvival(Func);
 }
-
-// We intercept _os_nospin_lock_lock because it's the internal
-// locking mechanism for MacOS's atomic implementation for data
-// types that are larger than the hardware's maximum lock-free size.
-// However, it's a private implementation detail and not visible in any headers,
-// so we must duplicate the required type definitions to forward declaration
-// what we need here.
-extern "C" {
-struct _os_nospin_lock_s {
-  unsigned int oul_value;
-};
-void _os_nospin_lock_lock(_os_nospin_lock_s *);
-}
-TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) {
-  _os_nospin_lock_s lock{};
-  auto Func = [&]() { _os_nospin_lock_lock(&lock); };
-  ExpectRealtimeDeath(Func, "_os_nospin_lock_lock");
-  ExpectNonRealtimeSurvival(Func);
-}
-#endif
+#endif // SANITIZER_APPLE
 
 #if SANITIZER_LINUX
 TEST(TestRtsanInterceptors, SpinLockLockDiesWhenRealtime) {

From 0019b7d0ae0bcc65af065542fcfb48ea0eb55d38 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Fri, 25 Apr 2025 20:05:00 +0530
Subject: [PATCH 21/83] [wasm-ld] Refactor WasmSym from static globals to
 per-link context (#134970)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Towards

This change moves WasmSym from a static global struct to an instance
owned by Ctx, allowing it to be reset cleanly between linker runs. This
enables safe support for multiple invocations of wasm-ld within the same
process

Changes done

- Converted WasmSym from a static struct to a regular struct with
instance members.

- Added a std::unique_ptr<WasmSym> wasmSym field inside Ctx.

- Reset wasmSym in Ctx::reset() to clear state between links.

- Replaced all WasmSym:: references with ctx.wasmSym->.

- Removed global symbol definitions from Symbols.cpp that are no longer
needed.

Clearing wasmSym in ctx.reset() ensures a clean slate for each link
invocation, preventing symbol leakage across runs—critical when using
wasm-ld/lld as a reentrant library where global state can cause subtle,
hard-to-debug errors.

---------

Co-authored-by: Vassil Vassilev <v.g.vassilev@gmail.com>
(cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4)
---
 lld/wasm/Config.h              | 106 +++++++++++++++++++
 lld/wasm/Driver.cpp            |  60 +++++------
 lld/wasm/InputChunks.cpp       |  10 +-
 lld/wasm/MarkLive.cpp          |   6 +-
 lld/wasm/OutputSections.cpp    |   4 +-
 lld/wasm/Symbols.cpp           |  25 -----
 lld/wasm/Symbols.h             |  99 ------------------
 lld/wasm/SyntheticSections.cpp |  32 +++---
 lld/wasm/Writer.cpp            | 185 +++++++++++++++++----------------
 9 files changed, 253 insertions(+), 274 deletions(-)

diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 1fa6c42d9cd86..527edc11c48e3 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -32,6 +32,11 @@ class InputTable;
 class InputGlobal;
 class InputFunction;
 class Symbol;
+class DefinedData;
+class GlobalSymbol;
+class DefinedFunction;
+class UndefinedGlobal;
+class TableSymbol;
 
 // For --unresolved-symbols.
 enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
@@ -139,6 +144,107 @@ struct Ctx {
   llvm::SmallVector<InputGlobal *, 0> syntheticGlobals;
   llvm::SmallVector<InputTable *, 0> syntheticTables;
 
+  // linker-generated symbols
+  struct WasmSym {
+    // __global_base
+    // Symbol marking the start of the global section.
+    DefinedData *globalBase;
+
+    // __stack_pointer/__stack_low/__stack_high
+    // Global that holds current value of stack pointer and data symbols marking
+    // the start and end of the stack region.  stackPointer is initialized to
+    // stackHigh and grows downwards towards stackLow
+    GlobalSymbol *stackPointer;
+    DefinedData *stackLow;
+    DefinedData *stackHigh;
+
+    // __tls_base
+    // Global that holds the address of the base of the current thread's
+    // TLS block.
+    GlobalSymbol *tlsBase;
+
+    // __tls_size
+    // Symbol whose value is the size of the TLS block.
+    GlobalSymbol *tlsSize;
+
+    // __tls_size
+    // Symbol whose value is the alignment of the TLS block.
+    GlobalSymbol *tlsAlign;
+
+    // __data_end
+    // Symbol marking the end of the data and bss.
+    DefinedData *dataEnd;
+
+    // __heap_base/__heap_end
+    // Symbols marking the beginning and end of the "heap". It starts at the end
+    // of the data, bss and explicit stack, and extends to the end of the linear
+    // memory allocated by wasm-ld. This region of memory is not used by the
+    // linked code, so it may be used as a backing store for `sbrk` or `malloc`
+    // implementations.
+    DefinedData *heapBase;
+    DefinedData *heapEnd;
+
+    // __wasm_init_memory_flag
+    // Symbol whose contents are nonzero iff memory has already been
+    // initialized.
+    DefinedData *initMemoryFlag;
+
+    // __wasm_init_memory
+    // Function that initializes passive data segments during instantiation.
+    DefinedFunction *initMemory;
+
+    // __wasm_call_ctors
+    // Function that directly calls all ctors in priority order.
+    DefinedFunction *callCtors;
+
+    // __wasm_call_dtors
+    // Function that calls the libc/etc. cleanup function.
+    DefinedFunction *callDtors;
+
+    // __wasm_apply_global_relocs
+    // Function that applies relocations to wasm globals post-instantiation.
+    // Unlike __wasm_apply_data_relocs this needs to run on every thread.
+    DefinedFunction *applyGlobalRelocs;
+
+    // __wasm_apply_tls_relocs
+    // Like __wasm_apply_data_relocs but for TLS section.  These must be
+    // delayed until __wasm_init_tls.
+    DefinedFunction *applyTLSRelocs;
+
+    // __wasm_apply_global_tls_relocs
+    // Like applyGlobalRelocs but for globals that hold TLS addresses.  These
+    // must be delayed until __wasm_init_tls.
+    DefinedFunction *applyGlobalTLSRelocs;
+
+    // __wasm_init_tls
+    // Function that allocates thread-local storage and initializes it.
+    DefinedFunction *initTLS;
+
+    // Pointer to the function that is to be used in the start section.
+    // (normally an alias of initMemory, or applyGlobalRelocs).
+    DefinedFunction *startFunction;
+
+    // __dso_handle
+    // Symbol used in calls to __cxa_atexit to determine current DLL
+    DefinedData *dsoHandle;
+
+    // __table_base
+    // Used in PIC code for offset of indirect function table
+    UndefinedGlobal *tableBase;
+    DefinedData *definedTableBase;
+
+    // __memory_base
+    // Used in PIC code for offset of global data
+    UndefinedGlobal *memoryBase;
+    DefinedData *definedMemoryBase;
+
+    // __indirect_function_table
+    // Used as an address space for function pointers, with each function that
+    // is used as a function pointer being allocated a slot.
+    TableSymbol *indirectFunctionTable;
+  };
+  WasmSym sym;
+
   // True if we are creating position-independent code.
   bool isPic = false;
 
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index c3a74dde6480e..467c49e9981bc 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -70,6 +70,7 @@ void Ctx::reset() {
   isPic = false;
   legacyFunctionTable = false;
   emitBssSegments = false;
+  sym = WasmSym{};
 }
 
 namespace {
@@ -941,14 +942,14 @@ static void createSyntheticSymbols() {
                                                             true};
   static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64,
                                                             true};
-  WasmSym::callCtors = symtab->addSyntheticFunction(
+  ctx.sym.callCtors = symtab->addSyntheticFunction(
       "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN,
       make<SyntheticFunction>(nullSignature, "__wasm_call_ctors"));
 
   bool is64 = ctx.arg.is64.value_or(false);
 
   if (ctx.isPic) {
-    WasmSym::stackPointer =
+    ctx.sym.stackPointer =
         createUndefinedGlobal("__stack_pointer", ctx.arg.is64.value_or(false)
                                                      ? &mutableGlobalTypeI64
                                                      : &mutableGlobalTypeI32);
@@ -958,25 +959,24 @@ static void createSyntheticSymbols() {
     // See:
     // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md
     auto *globalType = is64 ? &globalTypeI64 : &globalTypeI32;
-    WasmSym::memoryBase = createUndefinedGlobal("__memory_base", globalType);
-    WasmSym::tableBase = createUndefinedGlobal("__table_base", globalType);
-    WasmSym::memoryBase->markLive();
-    WasmSym::tableBase->markLive();
+    ctx.sym.memoryBase = createUndefinedGlobal("__memory_base", globalType);
+    ctx.sym.tableBase = createUndefinedGlobal("__table_base", globalType);
+    ctx.sym.memoryBase->markLive();
+    ctx.sym.tableBase->markLive();
   } else {
     // For non-PIC code
-    WasmSym::stackPointer = createGlobalVariable("__stack_pointer", true);
-    WasmSym::stackPointer->markLive();
+    ctx.sym.stackPointer = createGlobalVariable("__stack_pointer", true);
+    ctx.sym.stackPointer->markLive();
   }
 
   if (ctx.arg.sharedMemory) {
-    WasmSym::tlsBase = createGlobalVariable("__tls_base", true);
-    WasmSym::tlsSize = createGlobalVariable("__tls_size", false);
-    WasmSym::tlsAlign = createGlobalVariable("__tls_align", false);
-    WasmSym::initTLS = symtab->addSyntheticFunction(
+    ctx.sym.tlsBase = createGlobalVariable("__tls_base", true);
+    ctx.sym.tlsSize = createGlobalVariable("__tls_size", false);
+    ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false);
+    ctx.sym.initTLS = symtab->addSyntheticFunction(
         "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
-        make<SyntheticFunction>(
-            is64 ? i64ArgSignature : i32ArgSignature,
-            "__wasm_init_tls"));
+        make<SyntheticFunction>(is64 ? i64ArgSignature : i32ArgSignature,
+                                "__wasm_init_tls"));
   }
 }
 
@@ -984,19 +984,19 @@ static void createOptionalSymbols() {
   if (ctx.arg.relocatable)
     return;
 
-  WasmSym::dsoHandle = symtab->addOptionalDataSymbol("__dso_handle");
+  ctx.sym.dsoHandle = symtab->addOptionalDataSymbol("__dso_handle");
 
   if (!ctx.arg.shared)
-    WasmSym::dataEnd = symtab->addOptionalDataSymbol("__data_end");
+    ctx.sym.dataEnd = symtab->addOptionalDataSymbol("__data_end");
 
   if (!ctx.isPic) {
-    WasmSym::stackLow = symtab->addOptionalDataSymbol("__stack_low");
-    WasmSym::stackHigh = symtab->addOptionalDataSymbol("__stack_high");
-    WasmSym::globalBase = symtab->addOptionalDataSymbol("__global_base");
-    WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base");
-    WasmSym::heapEnd = symtab->addOptionalDataSymbol("__heap_end");
-    WasmSym::definedMemoryBase = symtab->addOptionalDataSymbol("__memory_base");
-    WasmSym::definedTableBase = symtab->addOptionalDataSymbol("__table_base");
+    ctx.sym.stackLow = symtab->addOptionalDataSymbol("__stack_low");
+    ctx.sym.stackHigh = symtab->addOptionalDataSymbol("__stack_high");
+    ctx.sym.globalBase = symtab->addOptionalDataSymbol("__global_base");
+    ctx.sym.heapBase = symtab->addOptionalDataSymbol("__heap_base");
+    ctx.sym.heapEnd = symtab->addOptionalDataSymbol("__heap_end");
+    ctx.sym.definedMemoryBase = symtab->addOptionalDataSymbol("__memory_base");
+    ctx.sym.definedTableBase = symtab->addOptionalDataSymbol("__table_base");
   }
 
   // For non-shared memory programs we still need to define __tls_base since we
@@ -1009,7 +1009,7 @@ static void createOptionalSymbols() {
   // __tls_size and __tls_align are not needed in this case since they are only
   // needed for __wasm_init_tls (which we do not create in this case).
   if (!ctx.arg.sharedMemory)
-    WasmSym::tlsBase = createOptionalGlobal("__tls_base", false);
+    ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false);
 }
 
 static void processStubLibrariesPreLTO() {
@@ -1384,9 +1384,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // by libc/etc., because destructors are registered dynamically with
   // `__cxa_atexit` and friends.
   if (!ctx.arg.relocatable && !ctx.arg.shared &&
-      !WasmSym::callCtors->isUsedInRegularObj &&
-      WasmSym::callCtors->getName() != ctx.arg.entry &&
-      !ctx.arg.exportedSymbols.count(WasmSym::callCtors->getName())) {
+      !ctx.sym.callCtors->isUsedInRegularObj &&
+      ctx.sym.callCtors->getName() != ctx.arg.entry &&
+      !ctx.arg.exportedSymbols.count(ctx.sym.callCtors->getName())) {
     if (Symbol *callDtors =
             handleUndefined("__wasm_call_dtors", "<internal>")) {
       if (auto *callDtorsFunc = dyn_cast<DefinedFunction>(callDtors)) {
@@ -1395,7 +1395,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
              !callDtorsFunc->signature->Returns.empty())) {
           error("__wasm_call_dtors must have no argument or return values");
         }
-        WasmSym::callDtors = callDtorsFunc;
+        ctx.sym.callDtors = callDtorsFunc;
       } else {
         error("__wasm_call_dtors must be a function");
       }
@@ -1488,7 +1488,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   markLive();
 
   // Provide the indirect function table if needed.
-  WasmSym::indirectFunctionTable =
+  ctx.sym.indirectFunctionTable =
       symtab->resolveIndirectFunctionTable(/*required =*/false);
 
   if (errorCount())
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index ccdc92f5c8d71..0e6c4e691be10 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -397,9 +397,9 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const {
     if (ctx.isPic) {
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       if (isTLS())
-        writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "tls_base");
+        writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "tls_base");
       else
-        writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
+        writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "memory_base");
       writeU8(os, opcode_ptr_add, "ADD");
     }
 
@@ -422,12 +422,12 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const {
       }
     } else {
       assert(ctx.isPic);
-      const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
+      const GlobalSymbol *baseSymbol = ctx.sym.memoryBase;
       if (rel.Type == R_WASM_TABLE_INDEX_I32 ||
           rel.Type == R_WASM_TABLE_INDEX_I64)
-        baseSymbol = WasmSym::tableBase;
+        baseSymbol = ctx.sym.tableBase;
       else if (sym->isTLS())
-        baseSymbol = WasmSym::tlsBase;
+        baseSymbol = ctx.sym.tlsBase;
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       writeUleb128(os, baseSymbol->getGlobalIndex(), "base");
       writeU8(os, opcode_reloc_const, "CONST");
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 13c7a3d894fe3..2b2cf19f14b30 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -114,8 +114,8 @@ void MarkLive::run() {
     if (sym->isNoStrip() || sym->isExported())
       enqueue(sym);
 
-  if (WasmSym::callDtors)
-    enqueue(WasmSym::callDtors);
+  if (ctx.sym.callDtors)
+    enqueue(ctx.sym.callDtors);
 
   for (const ObjFile *obj : ctx.objectFiles)
     if (obj->isLive()) {
@@ -131,7 +131,7 @@ void MarkLive::run() {
   // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
   // live so that we assign it an index and call it.
   if (isCallCtorsLive())
-    WasmSym::callCtors->markLive();
+    ctx.sym.callCtors->markLive();
 }
 
 void MarkLive::mark() {
diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp
index 95f7ecc29de6b..4142a913c8cbf 100644
--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@@ -123,7 +123,7 @@ void DataSection::finalizeContents() {
     if ((segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
       if (ctx.isPic && ctx.arg.extendedConst) {
         writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get");
-        writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
+        writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(),
                      "literal (global index)");
         if (segment->startVA) {
           writePtrConst(os, segment->startVA, is64, "offset");
@@ -136,7 +136,7 @@ void DataSection::finalizeContents() {
         if (ctx.isPic) {
           assert(segment->startVA == 0);
           initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET;
-          initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex();
+          initExpr.Inst.Value.Global = ctx.sym.memoryBase->getGlobalIndex();
         } else {
           initExpr = intConst(segment->startVA, is64);
         }
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index a687fd6d6c4ef..92a933ecbb024 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -77,31 +77,6 @@ std::string toString(wasm::Symbol::Kind kind) {
 }
 
 namespace wasm {
-DefinedFunction *WasmSym::callCtors;
-DefinedFunction *WasmSym::callDtors;
-DefinedFunction *WasmSym::initMemory;
-DefinedFunction *WasmSym::applyGlobalRelocs;
-DefinedFunction *WasmSym::applyTLSRelocs;
-DefinedFunction *WasmSym::applyGlobalTLSRelocs;
-DefinedFunction *WasmSym::initTLS;
-DefinedFunction *WasmSym::startFunction;
-DefinedData *WasmSym::dsoHandle;
-DefinedData *WasmSym::dataEnd;
-DefinedData *WasmSym::globalBase;
-DefinedData *WasmSym::heapBase;
-DefinedData *WasmSym::heapEnd;
-DefinedData *WasmSym::initMemoryFlag;
-GlobalSymbol *WasmSym::stackPointer;
-DefinedData *WasmSym::stackLow;
-DefinedData *WasmSym::stackHigh;
-GlobalSymbol *WasmSym::tlsBase;
-GlobalSymbol *WasmSym::tlsSize;
-GlobalSymbol *WasmSym::tlsAlign;
-UndefinedGlobal *WasmSym::tableBase;
-DefinedData *WasmSym::definedTableBase;
-UndefinedGlobal *WasmSym::memoryBase;
-DefinedData *WasmSym::definedMemoryBase;
-TableSymbol *WasmSym::indirectFunctionTable;
 
 WasmSymbolType Symbol::getWasmType() const {
   if (isa<FunctionSymbol>(this))
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index b409fffc50a6c..55ee21939ce07 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -537,105 +537,6 @@ class LazySymbol : public Symbol {
   const WasmSignature *signature = nullptr;
 };
 
-// linker-generated symbols
-struct WasmSym {
-  // __global_base
-  // Symbol marking the start of the global section.
-  static DefinedData *globalBase;
-
-  // __stack_pointer/__stack_low/__stack_high
-  // Global that holds current value of stack pointer and data symbols marking
-  // the start and end of the stack region.  stackPointer is initialized to
-  // stackHigh and grows downwards towards stackLow
-  static GlobalSymbol *stackPointer;
-  static DefinedData *stackLow;
-  static DefinedData *stackHigh;
-
-  // __tls_base
-  // Global that holds the address of the base of the current thread's
-  // TLS block.
-  static GlobalSymbol *tlsBase;
-
-  // __tls_size
-  // Symbol whose value is the size of the TLS block.
-  static GlobalSymbol *tlsSize;
-
-  // __tls_size
-  // Symbol whose value is the alignment of the TLS block.
-  static GlobalSymbol *tlsAlign;
-
-  // __data_end
-  // Symbol marking the end of the data and bss.
-  static DefinedData *dataEnd;
-
-  // __heap_base/__heap_end
-  // Symbols marking the beginning and end of the "heap". It starts at the end
-  // of the data, bss and explicit stack, and extends to the end of the linear
-  // memory allocated by wasm-ld. This region of memory is not used by the
-  // linked code, so it may be used as a backing store for `sbrk` or `malloc`
-  // implementations.
-  static DefinedData *heapBase;
-  static DefinedData *heapEnd;
-
-  // __wasm_init_memory_flag
-  // Symbol whose contents are nonzero iff memory has already been initialized.
-  static DefinedData *initMemoryFlag;
-
-  // __wasm_init_memory
-  // Function that initializes passive data segments during instantiation.
-  static DefinedFunction *initMemory;
-
-  // __wasm_call_ctors
-  // Function that directly calls all ctors in priority order.
-  static DefinedFunction *callCtors;
-
-  // __wasm_call_dtors
-  // Function that calls the libc/etc. cleanup function.
-  static DefinedFunction *callDtors;
-
-  // __wasm_apply_global_relocs
-  // Function that applies relocations to wasm globals post-instantiation.
-  // Unlike __wasm_apply_data_relocs this needs to run on every thread.
-  static DefinedFunction *applyGlobalRelocs;
-
-  // __wasm_apply_tls_relocs
-  // Like __wasm_apply_data_relocs but for TLS section.  These must be
-  // delayed until __wasm_init_tls.
-  static DefinedFunction *applyTLSRelocs;
-
-  // __wasm_apply_global_tls_relocs
-  // Like applyGlobalRelocs but for globals that hold TLS addresses.  These
-  // must be delayed until __wasm_init_tls.
-  static DefinedFunction *applyGlobalTLSRelocs;
-
-  // __wasm_init_tls
-  // Function that allocates thread-local storage and initializes it.
-  static DefinedFunction *initTLS;
-
-  // Pointer to the function that is to be used in the start section.
-  // (normally an alias of initMemory, or applyGlobalRelocs).
-  static DefinedFunction *startFunction;
-
-  // __dso_handle
-  // Symbol used in calls to __cxa_atexit to determine current DLL
-  static DefinedData *dsoHandle;
-
-  // __table_base
-  // Used in PIC code for offset of indirect function table
-  static UndefinedGlobal *tableBase;
-  static DefinedData *definedTableBase;
-
-  // __memory_base
-  // Used in PIC code for offset of global data
-  static UndefinedGlobal *memoryBase;
-  static DefinedData *definedMemoryBase;
-
-  // __indirect_function_table
-  // Used as an address space for function pointers, with each function that is
-  // used as a function pointer being allocated a slot.
-  static TableSymbol *indirectFunctionTable;
-};
-
 // A buffer class that is large enough to hold any Symbol-derived
 // object. We allocate memory using this class and instantiate a symbol
 // using the placement new.
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 7fb44b9f0c009..0e2aa57e9048e 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -319,8 +319,8 @@ void TableSection::addTable(InputTable *table) {
   // Some inputs require that the indirect function table be assigned to table
   // number 0.
   if (ctx.legacyFunctionTable &&
-      isa<DefinedTable>(WasmSym::indirectFunctionTable) &&
-      cast<DefinedTable>(WasmSym::indirectFunctionTable)->table == table) {
+      isa<DefinedTable>(ctx.sym.indirectFunctionTable) &&
+      cast<DefinedTable>(ctx.sym.indirectFunctionTable)->table == table) {
     if (out.importSec->getNumImportedTables()) {
       // Alack!  Some other input imported a table, meaning that we are unable
       // to assign table number 0 to the indirect function table.
@@ -395,8 +395,8 @@ void GlobalSection::assignIndexes() {
 }
 
 static void ensureIndirectFunctionTable() {
-  if (!WasmSym::indirectFunctionTable)
-    WasmSym::indirectFunctionTable =
+  if (!ctx.sym.indirectFunctionTable)
+    ctx.sym.indirectFunctionTable =
         symtab->resolveIndirectFunctionTable(/*required =*/true);
 }
 
@@ -430,10 +430,9 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const {
       // Get __memory_base
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       if (sym->isTLS())
-        writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "__tls_base");
+        writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
       else
-        writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
-                     "__memory_base");
+        writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "__memory_base");
 
       // Add the virtual address of the data symbol
       writeU8(os, opcode_ptr_const, "CONST");
@@ -443,7 +442,7 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const {
         continue;
       // Get __table_base
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
-      writeUleb128(os, WasmSym::tableBase->getGlobalIndex(), "__table_base");
+      writeUleb128(os, ctx.sym.tableBase->getGlobalIndex(), "__table_base");
 
       // Add the table index to __table_base
       writeU8(os, opcode_ptr_const, "CONST");
@@ -490,13 +489,13 @@ void GlobalSection::writeBody() {
     if (ctx.arg.extendedConst && ctx.isPic) {
       if (auto *d = dyn_cast<DefinedData>(sym)) {
         if (!sym->isTLS()) {
-          globalIdx = WasmSym::memoryBase->getGlobalIndex();
+          globalIdx = ctx.sym.memoryBase->getGlobalIndex();
           offset = d->getVA();
           useExtendedConst = true;
         }
       } else if (auto *f = dyn_cast<FunctionSymbol>(sym)) {
         if (!sym->isStub) {
-          globalIdx = WasmSym::tableBase->getGlobalIndex();
+          globalIdx = ctx.sym.tableBase->getGlobalIndex();
           offset = f->getTableIndex();
           useExtendedConst = true;
         }
@@ -550,14 +549,11 @@ void ExportSection::writeBody() {
     writeExport(os, export_);
 }
 
-bool StartSection::isNeeded() const {
-  return WasmSym::startFunction != nullptr;
-}
+bool StartSection::isNeeded() const { return ctx.sym.startFunction != nullptr; }
 
 void StartSection::writeBody() {
   raw_ostream &os = bodyOutputStream;
-  writeUleb128(os, WasmSym::startFunction->getFunctionIndex(),
-               "function index");
+  writeUleb128(os, ctx.sym.startFunction->getFunctionIndex(), "function index");
 }
 
 void ElemSection::addEntry(FunctionSymbol *sym) {
@@ -573,9 +569,9 @@ void ElemSection::addEntry(FunctionSymbol *sym) {
 void ElemSection::writeBody() {
   raw_ostream &os = bodyOutputStream;
 
-  assert(WasmSym::indirectFunctionTable);
+  assert(ctx.sym.indirectFunctionTable);
   writeUleb128(os, 1, "segment count");
-  uint32_t tableNumber = WasmSym::indirectFunctionTable->getTableNumber();
+  uint32_t tableNumber = ctx.sym.indirectFunctionTable->getTableNumber();
   uint32_t flags = 0;
   if (tableNumber)
     flags |= WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER;
@@ -587,7 +583,7 @@ void ElemSection::writeBody() {
   initExpr.Extended = false;
   if (ctx.isPic) {
     initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET;
-    initExpr.Inst.Value.Global = WasmSym::tableBase->getGlobalIndex();
+    initExpr.Inst.Value.Global = ctx.sym.tableBase->getGlobalIndex();
   } else {
     bool is64 = ctx.arg.is64.value_or(false);
     initExpr = intConst(ctx.arg.tableBase, is64);
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 76e38f548157c..2bf4b370a7dbd 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -340,16 +340,16 @@ void Writer::layoutMemory() {
     if (ctx.arg.relocatable || ctx.isPic)
       return;
     memoryPtr = alignTo(memoryPtr, stackAlignment);
-    if (WasmSym::stackLow)
-      WasmSym::stackLow->setVA(memoryPtr);
+    if (ctx.sym.stackLow)
+      ctx.sym.stackLow->setVA(memoryPtr);
     if (ctx.arg.zStackSize != alignTo(ctx.arg.zStackSize, stackAlignment))
       error("stack size must be " + Twine(stackAlignment) + "-byte aligned");
     log("mem: stack size  = " + Twine(ctx.arg.zStackSize));
     log("mem: stack base  = " + Twine(memoryPtr));
     memoryPtr += ctx.arg.zStackSize;
-    setGlobalPtr(cast<DefinedGlobal>(WasmSym::stackPointer), memoryPtr);
-    if (WasmSym::stackHigh)
-      WasmSym::stackHigh->setVA(memoryPtr);
+    setGlobalPtr(cast<DefinedGlobal>(ctx.sym.stackPointer), memoryPtr);
+    if (ctx.sym.stackHigh)
+      ctx.sym.stackHigh->setVA(memoryPtr);
     log("mem: stack top   = " + Twine(memoryPtr));
   };
 
@@ -367,15 +367,15 @@ void Writer::layoutMemory() {
   }
 
   log("mem: global base = " + Twine(memoryPtr));
-  if (WasmSym::globalBase)
-    WasmSym::globalBase->setVA(memoryPtr);
+  if (ctx.sym.globalBase)
+    ctx.sym.globalBase->setVA(memoryPtr);
 
   uint64_t dataStart = memoryPtr;
 
   // Arbitrarily set __dso_handle handle to point to the start of the data
   // segments.
-  if (WasmSym::dsoHandle)
-    WasmSym::dsoHandle->setVA(dataStart);
+  if (ctx.sym.dsoHandle)
+    ctx.sym.dsoHandle->setVA(dataStart);
 
   out.dylinkSec->memAlign = 0;
   for (OutputSegment *seg : segments) {
@@ -386,16 +386,16 @@ void Writer::layoutMemory() {
                 memoryPtr, seg->size, seg->alignment));
 
     if (!ctx.arg.relocatable && seg->isTLS()) {
-      if (WasmSym::tlsSize) {
-        auto *tlsSize = cast<DefinedGlobal>(WasmSym::tlsSize);
+      if (ctx.sym.tlsSize) {
+        auto *tlsSize = cast<DefinedGlobal>(ctx.sym.tlsSize);
         setGlobalPtr(tlsSize, seg->size);
       }
-      if (WasmSym::tlsAlign) {
-        auto *tlsAlign = cast<DefinedGlobal>(WasmSym::tlsAlign);
+      if (ctx.sym.tlsAlign) {
+        auto *tlsAlign = cast<DefinedGlobal>(ctx.sym.tlsAlign);
         setGlobalPtr(tlsAlign, int64_t{1} << seg->alignment);
       }
-      if (!ctx.arg.sharedMemory && WasmSym::tlsBase) {
-        auto *tlsBase = cast<DefinedGlobal>(WasmSym::tlsBase);
+      if (!ctx.arg.sharedMemory && ctx.sym.tlsBase) {
+        auto *tlsBase = cast<DefinedGlobal>(ctx.sym.tlsBase);
         setGlobalPtr(tlsBase, memoryPtr);
       }
     }
@@ -406,17 +406,17 @@ void Writer::layoutMemory() {
   // Make space for the memory initialization flag
   if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) {
     memoryPtr = alignTo(memoryPtr, 4);
-    WasmSym::initMemoryFlag = symtab->addSyntheticDataSymbol(
+    ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
         "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
-    WasmSym::initMemoryFlag->markLive();
-    WasmSym::initMemoryFlag->setVA(memoryPtr);
+    ctx.sym.initMemoryFlag->markLive();
+    ctx.sym.initMemoryFlag->setVA(memoryPtr);
     log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}",
                 "__wasm_init_memory_flag", memoryPtr, 4, 4));
     memoryPtr += 4;
   }
 
-  if (WasmSym::dataEnd)
-    WasmSym::dataEnd->setVA(memoryPtr);
+  if (ctx.sym.dataEnd)
+    ctx.sym.dataEnd->setVA(memoryPtr);
 
   uint64_t staticDataSize = memoryPtr - dataStart;
   log("mem: static data = " + Twine(staticDataSize));
@@ -426,7 +426,7 @@ void Writer::layoutMemory() {
   if (!ctx.arg.stackFirst)
     placeStack();
 
-  if (WasmSym::heapBase) {
+  if (ctx.sym.heapBase) {
     // Set `__heap_base` to follow the end of the stack or global data. The
     // fact that this comes last means that a malloc/brk implementation can
     // grow the heap at runtime.
@@ -434,7 +434,7 @@ void Writer::layoutMemory() {
     // __heap_base to be aligned already.
     memoryPtr = alignTo(memoryPtr, heapAlignment);
     log("mem: heap base   = " + Twine(memoryPtr));
-    WasmSym::heapBase->setVA(memoryPtr);
+    ctx.sym.heapBase->setVA(memoryPtr);
   }
 
   uint64_t maxMemorySetting = 1ULL << 32;
@@ -470,12 +470,12 @@ void Writer::layoutMemory() {
   out.memorySec->numMemoryPages = memoryPtr / WasmPageSize;
   log("mem: total pages = " + Twine(out.memorySec->numMemoryPages));
 
-  if (WasmSym::heapEnd) {
+  if (ctx.sym.heapEnd) {
     // Set `__heap_end` to follow the end of the statically allocated linear
     // memory. The fact that this comes last means that a malloc/brk
     // implementation can grow the heap at runtime.
     log("mem: heap end    = " + Twine(memoryPtr));
-    WasmSym::heapEnd->setVA(memoryPtr);
+    ctx.sym.heapEnd->setVA(memoryPtr);
   }
 
   uint64_t maxMemory = 0;
@@ -758,14 +758,14 @@ void Writer::calculateImports() {
   // Some inputs require that the indirect function table be assigned to table
   // number 0, so if it is present and is an import, allocate it before any
   // other tables.
-  if (WasmSym::indirectFunctionTable &&
-      shouldImport(WasmSym::indirectFunctionTable))
-    out.importSec->addImport(WasmSym::indirectFunctionTable);
+  if (ctx.sym.indirectFunctionTable &&
+      shouldImport(ctx.sym.indirectFunctionTable))
+    out.importSec->addImport(ctx.sym.indirectFunctionTable);
 
   for (Symbol *sym : symtab->symbols()) {
     if (!shouldImport(sym))
       continue;
-    if (sym == WasmSym::indirectFunctionTable)
+    if (sym == ctx.sym.indirectFunctionTable)
       continue;
     LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n");
     out.importSec->addImport(sym);
@@ -879,7 +879,7 @@ void Writer::createCommandExportWrappers() {
 
   // If there are no ctors and there's no libc `__wasm_call_dtors` to
   // call, don't wrap the exports.
-  if (initFunctions.empty() && WasmSym::callDtors == nullptr)
+  if (initFunctions.empty() && ctx.sym.callDtors == nullptr)
     return;
 
   std::vector<DefinedFunction *> toWrap;
@@ -919,27 +919,27 @@ void Writer::createCommandExportWrappers() {
 }
 
 static void finalizeIndirectFunctionTable() {
-  if (!WasmSym::indirectFunctionTable)
+  if (!ctx.sym.indirectFunctionTable)
     return;
 
-  if (shouldImport(WasmSym::indirectFunctionTable) &&
-      !WasmSym::indirectFunctionTable->hasTableNumber()) {
+  if (shouldImport(ctx.sym.indirectFunctionTable) &&
+      !ctx.sym.indirectFunctionTable->hasTableNumber()) {
     // Processing -Bsymbolic relocations resulted in a late requirement that the
     // indirect function table be present, and we are running in --import-table
     // mode.  Add the table now to the imports section.  Otherwise it will be
     // added to the tables section later in assignIndexes.
-    out.importSec->addImport(WasmSym::indirectFunctionTable);
+    out.importSec->addImport(ctx.sym.indirectFunctionTable);
   }
 
   uint32_t tableSize = ctx.arg.tableBase + out.elemSec->numEntries();
   WasmLimits limits = {0, tableSize, 0};
-  if (WasmSym::indirectFunctionTable->isDefined() && !ctx.arg.growableTable) {
+  if (ctx.sym.indirectFunctionTable->isDefined() && !ctx.arg.growableTable) {
     limits.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
     limits.Maximum = limits.Minimum;
   }
   if (ctx.arg.is64.value_or(false))
     limits.Flags |= WASM_LIMITS_FLAG_IS_64;
-  WasmSym::indirectFunctionTable->setLimits(limits);
+  ctx.sym.indirectFunctionTable->setLimits(limits);
 }
 
 static void scanRelocations() {
@@ -1142,26 +1142,26 @@ void Writer::createSyntheticInitFunctions() {
   // We also initialize bss segments (using memory.fill) as part of this
   // function.
   if (hasPassiveInitializedSegments()) {
-    WasmSym::initMemory = symtab->addSyntheticFunction(
+    ctx.sym.initMemory = symtab->addSyntheticFunction(
         "__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN,
         make<SyntheticFunction>(nullSignature, "__wasm_init_memory"));
-    WasmSym::initMemory->markLive();
+    ctx.sym.initMemory->markLive();
     if (ctx.arg.sharedMemory) {
       // This global is assigned during  __wasm_init_memory in the shared memory
       // case.
-      WasmSym::tlsBase->markLive();
+      ctx.sym.tlsBase->markLive();
     }
   }
 
   if (ctx.arg.sharedMemory) {
     if (out.globalSec->needsTLSRelocations()) {
-      WasmSym::applyGlobalTLSRelocs = symtab->addSyntheticFunction(
+      ctx.sym.applyGlobalTLSRelocs = symtab->addSyntheticFunction(
           "__wasm_apply_global_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN,
           make<SyntheticFunction>(nullSignature,
                                   "__wasm_apply_global_tls_relocs"));
-      WasmSym::applyGlobalTLSRelocs->markLive();
+      ctx.sym.applyGlobalTLSRelocs->markLive();
       // TLS relocations depend on  the __tls_base symbols
-      WasmSym::tlsBase->markLive();
+      ctx.sym.tlsBase->markLive();
     }
 
     auto hasTLSRelocs = [](const OutputSegment *segment) {
@@ -1172,40 +1172,39 @@ void Writer::createSyntheticInitFunctions() {
       return false;
     };
     if (llvm::any_of(segments, hasTLSRelocs)) {
-      WasmSym::applyTLSRelocs = symtab->addSyntheticFunction(
+      ctx.sym.applyTLSRelocs = symtab->addSyntheticFunction(
           "__wasm_apply_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN,
-          make<SyntheticFunction>(nullSignature,
-                                  "__wasm_apply_tls_relocs"));
-      WasmSym::applyTLSRelocs->markLive();
+          make<SyntheticFunction>(nullSignature, "__wasm_apply_tls_relocs"));
+      ctx.sym.applyTLSRelocs->markLive();
     }
   }
 
   if (ctx.isPic && out.globalSec->needsRelocations()) {
-    WasmSym::applyGlobalRelocs = symtab->addSyntheticFunction(
+    ctx.sym.applyGlobalRelocs = symtab->addSyntheticFunction(
         "__wasm_apply_global_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN,
         make<SyntheticFunction>(nullSignature, "__wasm_apply_global_relocs"));
-    WasmSym::applyGlobalRelocs->markLive();
+    ctx.sym.applyGlobalRelocs->markLive();
   }
 
   // If there is only one start function we can just use that function
   // itself as the Wasm start function, otherwise we need to synthesize
   // a new function to call them in sequence.
-  if (WasmSym::applyGlobalRelocs && WasmSym::initMemory) {
-    WasmSym::startFunction = symtab->addSyntheticFunction(
+  if (ctx.sym.applyGlobalRelocs && ctx.sym.initMemory) {
+    ctx.sym.startFunction = symtab->addSyntheticFunction(
         "__wasm_start", WASM_SYMBOL_VISIBILITY_HIDDEN,
         make<SyntheticFunction>(nullSignature, "__wasm_start"));
-    WasmSym::startFunction->markLive();
+    ctx.sym.startFunction->markLive();
   }
 }
 
 void Writer::createInitMemoryFunction() {
   LLVM_DEBUG(dbgs() << "createInitMemoryFunction\n");
-  assert(WasmSym::initMemory);
+  assert(ctx.sym.initMemory);
   assert(hasPassiveInitializedSegments());
   uint64_t flagAddress;
   if (ctx.arg.sharedMemory) {
-    assert(WasmSym::initMemoryFlag);
-    flagAddress = WasmSym::initMemoryFlag->getVA();
+    assert(ctx.sym.initMemoryFlag);
+    flagAddress = ctx.sym.initMemoryFlag->getVA();
   }
   bool is64 = ctx.arg.is64.value_or(false);
   std::string bodyContent;
@@ -1278,7 +1277,7 @@ void Writer::createInitMemoryFunction() {
         writeUleb128(os, 2, "local count");
         writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type");
         writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
-        writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
+        writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "memory_base");
         writePtrConst(os, flagAddress, is64, "flag address");
         writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
         writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set");
@@ -1325,7 +1324,7 @@ void Writer::createInitMemoryFunction() {
         writePtrConst(os, s->startVA, is64, "destination address");
         if (ctx.isPic) {
           writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
-          writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(),
+          writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(),
                        "__memory_base");
           writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD,
                   "i32.add");
@@ -1343,8 +1342,7 @@ void Writer::createInitMemoryFunction() {
             writePtrConst(os, s->startVA, is64, "destination address");
           }
           writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET");
-          writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(),
-                       "__tls_base");
+          writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
           if (ctx.isPic) {
             writeU8(os, WASM_OPCODE_LOCAL_GET, "local.tee");
             writeUleb128(os, 1, "local 1");
@@ -1420,30 +1418,30 @@ void Writer::createInitMemoryFunction() {
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
-  createFunction(WasmSym::initMemory, bodyContent);
+  createFunction(ctx.sym.initMemory, bodyContent);
 }
 
 void Writer::createStartFunction() {
   // If the start function exists when we have more than one function to call.
-  if (WasmSym::initMemory && WasmSym::applyGlobalRelocs) {
-    assert(WasmSym::startFunction);
+  if (ctx.sym.initMemory && ctx.sym.applyGlobalRelocs) {
+    assert(ctx.sym.startFunction);
     std::string bodyContent;
     {
       raw_string_ostream os(bodyContent);
       writeUleb128(os, 0, "num locals");
       writeU8(os, WASM_OPCODE_CALL, "CALL");
-      writeUleb128(os, WasmSym::applyGlobalRelocs->getFunctionIndex(),
+      writeUleb128(os, ctx.sym.applyGlobalRelocs->getFunctionIndex(),
                    "function index");
       writeU8(os, WASM_OPCODE_CALL, "CALL");
-      writeUleb128(os, WasmSym::initMemory->getFunctionIndex(),
+      writeUleb128(os, ctx.sym.initMemory->getFunctionIndex(),
                    "function index");
       writeU8(os, WASM_OPCODE_END, "END");
     }
-    createFunction(WasmSym::startFunction, bodyContent);
-  } else if (WasmSym::initMemory) {
-    WasmSym::startFunction = WasmSym::initMemory;
-  } else if (WasmSym::applyGlobalRelocs) {
-    WasmSym::startFunction = WasmSym::applyGlobalRelocs;
+    createFunction(ctx.sym.startFunction, bodyContent);
+  } else if (ctx.sym.initMemory) {
+    ctx.sym.startFunction = ctx.sym.initMemory;
+  } else if (ctx.sym.applyGlobalRelocs) {
+    ctx.sym.startFunction = ctx.sym.applyGlobalRelocs;
   }
 }
 
@@ -1497,7 +1495,7 @@ void Writer::createApplyTLSRelocationsFunction() {
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
-  createFunction(WasmSym::applyTLSRelocs, bodyContent);
+  createFunction(ctx.sym.applyTLSRelocs, bodyContent);
 }
 
 // Similar to createApplyDataRelocationsFunction but generates relocation code
@@ -1513,7 +1511,7 @@ void Writer::createApplyGlobalRelocationsFunction() {
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
-  createFunction(WasmSym::applyGlobalRelocs, bodyContent);
+  createFunction(ctx.sym.applyGlobalRelocs, bodyContent);
 }
 
 // Similar to createApplyGlobalRelocationsFunction but for
@@ -1529,7 +1527,7 @@ void Writer::createApplyGlobalTLSRelocationsFunction() {
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
-  createFunction(WasmSym::applyGlobalTLSRelocs, bodyContent);
+  createFunction(ctx.sym.applyGlobalTLSRelocs, bodyContent);
 }
 
 // Create synthetic "__wasm_call_ctors" function based on ctor functions
@@ -1537,7 +1535,7 @@ void Writer::createApplyGlobalTLSRelocationsFunction() {
 void Writer::createCallCtorsFunction() {
   // If __wasm_call_ctors isn't referenced, there aren't any ctors, don't
   // define the `__wasm_call_ctors` function.
-  if (!WasmSym::callCtors->isLive() && initFunctions.empty())
+  if (!ctx.sym.callCtors->isLive() && initFunctions.empty())
     return;
 
   // First write the body's contents to a string.
@@ -1558,7 +1556,7 @@ void Writer::createCallCtorsFunction() {
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
-  createFunction(WasmSym::callCtors, bodyContent);
+  createFunction(ctx.sym.callCtors, bodyContent);
 }
 
 // Create a wrapper around a function export which calls the
@@ -1573,10 +1571,9 @@ void Writer::createCommandExportWrapper(uint32_t functionIndex,
 
     // Call `__wasm_call_ctors` which call static constructors (and
     // applies any runtime relocations in Emscripten-style PIC mode)
-    if (WasmSym::callCtors->isLive()) {
+    if (ctx.sym.callCtors->isLive()) {
       writeU8(os, WASM_OPCODE_CALL, "CALL");
-      writeUleb128(os, WasmSym::callCtors->getFunctionIndex(),
-                   "function index");
+      writeUleb128(os, ctx.sym.callCtors->getFunctionIndex(), "function index");
     }
 
     // Call the user's code, leaving any return values on the operand stack.
@@ -1588,7 +1585,7 @@ void Writer::createCommandExportWrapper(uint32_t functionIndex,
     writeUleb128(os, functionIndex, "function index");
 
     // Call the function that calls the destructors.
-    if (DefinedFunction *callDtors = WasmSym::callDtors) {
+    if (DefinedFunction *callDtors = ctx.sym.callDtors) {
       writeU8(os, WASM_OPCODE_CALL, "CALL");
       writeUleb128(os, callDtors->getFunctionIndex(), "function index");
     }
@@ -1619,7 +1616,7 @@ void Writer::createInitTLSFunction() {
       writeUleb128(os, 0, "local index");
 
       writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
-      writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index");
+      writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index");
 
       // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend op.
       writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
@@ -1635,28 +1632,28 @@ void Writer::createInitTLSFunction() {
       writeU8(os, 0, "memory index immediate");
     }
 
-    if (WasmSym::applyTLSRelocs) {
+    if (ctx.sym.applyTLSRelocs) {
       writeU8(os, WASM_OPCODE_CALL, "CALL");
-      writeUleb128(os, WasmSym::applyTLSRelocs->getFunctionIndex(),
+      writeUleb128(os, ctx.sym.applyTLSRelocs->getFunctionIndex(),
                    "function index");
     }
 
-    if (WasmSym::applyGlobalTLSRelocs) {
+    if (ctx.sym.applyGlobalTLSRelocs) {
       writeU8(os, WASM_OPCODE_CALL, "CALL");
-      writeUleb128(os, WasmSym::applyGlobalTLSRelocs->getFunctionIndex(),
+      writeUleb128(os, ctx.sym.applyGlobalTLSRelocs->getFunctionIndex(),
                    "function index");
     }
     writeU8(os, WASM_OPCODE_END, "end function");
   }
 
-  createFunction(WasmSym::initTLS, bodyContent);
+  createFunction(ctx.sym.initTLS, bodyContent);
 }
 
 // Populate InitFunctions vector with init functions from all input objects.
 // This is then used either when creating the output linking section or to
 // synthesize the "__wasm_call_ctors" function.
 void Writer::calculateInitFunctions() {
-  if (!ctx.arg.relocatable && !WasmSym::callCtors->isLive())
+  if (!ctx.arg.relocatable && !ctx.sym.callCtors->isLive())
     return;
 
   for (ObjFile *file : ctx.objectFiles) {
@@ -1707,8 +1704,8 @@ void Writer::createSyntheticSectionsPostLayout() {
 void Writer::run() {
   // For PIC code the table base is assigned dynamically by the loader.
   // For non-PIC, we start at 1 so that accessing table index 0 always traps.
-  if (!ctx.isPic && WasmSym::definedTableBase)
-    WasmSym::definedTableBase->setVA(ctx.arg.tableBase);
+  if (!ctx.isPic && ctx.sym.definedTableBase)
+    ctx.sym.definedTableBase->setVA(ctx.arg.tableBase);
 
   log("-- createOutputSegments");
   createOutputSegments();
@@ -1776,14 +1773,18 @@ void Writer::run() {
 
   if (!ctx.arg.relocatable) {
     // Create linker synthesized functions
-    if (WasmSym::applyGlobalRelocs)
+    if (ctx.sym.applyGlobalRelocs) {
       createApplyGlobalRelocationsFunction();
-    if (WasmSym::applyTLSRelocs)
+    }
+    if (ctx.sym.applyTLSRelocs) {
       createApplyTLSRelocationsFunction();
-    if (WasmSym::applyGlobalTLSRelocs)
+    }
+    if (ctx.sym.applyGlobalTLSRelocs) {
       createApplyGlobalTLSRelocationsFunction();
-    if (WasmSym::initMemory)
+    }
+    if (ctx.sym.initMemory) {
       createInitMemoryFunction();
+    }
     createStartFunction();
 
     createCallCtorsFunction();
@@ -1794,14 +1795,14 @@ void Writer::run() {
     // the input objects or an explicit export from the command-line, we
     // assume ctors and dtors are taken care of already.
     if (!ctx.arg.relocatable && !ctx.isPic &&
-        !WasmSym::callCtors->isUsedInRegularObj &&
-        !WasmSym::callCtors->isExported()) {
+        !ctx.sym.callCtors->isUsedInRegularObj &&
+        !ctx.sym.callCtors->isExported()) {
       log("-- createCommandExportWrappers");
       createCommandExportWrappers();
     }
   }
 
-  if (WasmSym::initTLS && WasmSym::initTLS->isLive()) {
+  if (ctx.sym.initTLS && ctx.sym.initTLS->isLive()) {
     log("-- createInitTLSFunction");
     createInitTLSFunction();
   }

From 72ad9be1e337f487c9db4dd634005d09f7bf2790 Mon Sep 17 00:00:00 2001
From: Losy001 <64610343+Losy001@users.noreply.github.com>
Date: Sat, 26 Apr 2025 18:04:12 +0200
Subject: [PATCH 22/83] [Clang][MicrosoftMangle] Implement mangling for
 ConstantMatrixType (#134930)

This pull request implements mangling for ConstantMatrixType, allowing
matrices to be used on Windows.

Related issues: #53158, #127127

This example code:
```cpp
#include <typeinfo>
#include <stdio.h>

typedef float Matrix4 __attribute__((matrix_type(4, 4)));

int main()
{
  printf("%s\n", typeid(Matrix4).name());
}
```
Outputs this:
```
struct __clang::__matrix<float,4,4>
```

(cherry picked from commit f5a30f111dc4ad6422863722eb708059a68a9d5c)
---
 clang/lib/AST/MicrosoftMangle.cpp          | 17 ++++++-
 clang/test/CodeGenCXX/mangle-ms-matrix.cpp | 57 ++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenCXX/mangle-ms-matrix.cpp

diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 42b735ccf4a2c..74c995f2f97f0 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3552,7 +3552,22 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T,
 
 void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T,
                                          Qualifiers quals, SourceRange Range) {
-  Error(Range.getBegin(), "matrix type") << Range;
+  QualType EltTy = T->getElementType();
+  const BuiltinType *ET = EltTy->getAs<BuiltinType>();
+
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+
+  Stream << "?$";
+
+  Extra.mangleSourceName("__matrix");
+  Extra.mangleType(EltTy, Range, QMM_Escape);
+
+  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumRows()));
+  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumColumns()));
+
+  mangleArtificialTagType(TagTypeKind::Struct, TemplateMangling, {"__clang"});
 }
 
 void MicrosoftCXXNameMangler::mangleType(const DependentSizedMatrixType *T,
diff --git a/clang/test/CodeGenCXX/mangle-ms-matrix.cpp b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp
new file mode 100644
index 0000000000000..b244aa6e33cfa
--- /dev/null
+++ b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 | FileCheck %s
+// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 -fexperimental-new-constant-interpreter | FileCheck %s
+
+typedef float __attribute__((matrix_type(4, 4))) m4x4f;
+typedef float __attribute__((matrix_type(2, 2))) m2x2f;
+
+typedef int __attribute__((matrix_type(4, 4))) m4x4i;
+typedef int __attribute__((matrix_type(2, 2))) m2x2i;
+
+void thow(int i) {
+  switch (i) {
+    case 0: throw m4x4f();
+    // CHECK: ??_R0U?$__matrix@M$03$03@__clang@@@8
+    // CHECK: _CT??_R0U?$__matrix@M$03$03@__clang@@@864
+    // CHECK: _CTA1U?$__matrix@M$03$03@__clang@@
+    // CHECK: _TI1U?$__matrix@M$03$03@__clang@@
+    case 1: throw m2x2f();
+    // CHECK: ??_R0U?$__matrix@M$01$01@__clang@@@8
+    // CHECK: _CT??_R0U?$__matrix@M$01$01@__clang@@@816
+    // CHECK: _CTA1U?$__matrix@M$01$01@__clang@@
+    // CHECK: _TI1U?$__matrix@M$01$01@__clang@@
+    case 2: throw m4x4i();
+    // CHECK: ??_R0U?$__matrix@H$03$03@__clang@@@8
+    // CHECK: _CT??_R0U?$__matrix@H$03$03@__clang@@@864
+    // CHECK: _CTA1U?$__matrix@H$03$03@__clang@@
+    // CHECK: _TI1U?$__matrix@H$03$03@__clang@@
+    case 3: throw m2x2i();
+    // CHECK: ??_R0U?$__matrix@H$01$01@__clang@@@8
+    // CHECK: _CT??_R0U?$__matrix@H$01$01@__clang@@@816
+    // CHECK: _CTA1U?$__matrix@H$01$01@__clang@@
+    // CHECK: _TI1U?$__matrix@H$01$01@__clang@@
+  }
+}
+
+void foo44f(m4x4f) {}
+// CHECK: define dso_local void @"?foo44f@@YAXU?$__matrix@M$03$03@__clang@@@Z"
+
+m4x4f rfoo44f() { return m4x4f(); }
+// CHECK: define dso_local noundef <16 x float> @"?rfoo44f@@YAU?$__matrix@M$03$03@__clang@@XZ"
+
+void foo22f(m2x2f) {}
+// CHECK: define dso_local void @"?foo22f@@YAXU?$__matrix@M$01$01@__clang@@@Z"
+
+m2x2f rfoo22f() { return m2x2f(); }
+// CHECK: define dso_local noundef <4 x float> @"?rfoo22f@@YAU?$__matrix@M$01$01@__clang@@XZ"
+
+void foo44i(m4x4i) {}
+// CHECK: define dso_local void @"?foo44i@@YAXU?$__matrix@H$03$03@__clang@@@Z"
+
+m4x4i rfoo44i() { return m4x4i(); }
+// CHECK: define dso_local noundef <16 x i32> @"?rfoo44i@@YAU?$__matrix@H$03$03@__clang@@XZ"
+
+void foo22i(m2x2i) {}
+// CHECK: define dso_local void @"?foo22i@@YAXU?$__matrix@H$01$01@__clang@@@Z"
+
+m2x2i rfoo22i() { return m2x2i(); }
+// CHECK: define dso_local noundef <4 x i32> @"?rfoo22i@@YAU?$__matrix@H$01$01@__clang@@XZ"
\ No newline at end of file

From 41c36d94080488cc938b1c1697c7e8353405cd75 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Sat, 26 Apr 2025 09:17:56 -0700
Subject: [PATCH 23/83] [clang] Fix unused variable warning in MS mangler from
 constant matrix patch

(cherry picked from commit ccdd55c518277d749eff878ffcb5ca3de55c2a60)
---
 clang/lib/AST/MicrosoftMangle.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 74c995f2f97f0..cb35dbd611204 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3553,7 +3553,6 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T,
 void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T,
                                          Qualifiers quals, SourceRange Range) {
   QualType EltTy = T->getElementType();
-  const BuiltinType *ET = EltTy->getAs<BuiltinType>();
 
   llvm::SmallString<64> TemplateMangling;
   llvm::raw_svector_ostream Stream(TemplateMangling);

From f233430d977b67cc6aba9362b7d9f1a82d43d6d1 Mon Sep 17 00:00:00 2001
From: Yuta Mukai <mukai.yuta@fujitsu.com>
Date: Fri, 9 May 2025 17:07:19 +0900
Subject: [PATCH 24/83] [AArch64] Fix feature list for FUJITSU-MONAKA processor
 (#139212)

FEAT_FP8DOT4 and FEAT_FP8FMA are supported by FUJITSU-MONAKA. These were
previously enabled due to dependencies, but now require explicit
activation due to modifications in the dependencies.

(cherry picked from commit 9d5a5424f0356bd6ee01c751dd6957299783b41b)
---
 .../Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c   | 2 ++
 llvm/lib/Target/AArch64/AArch64Processors.td                   | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
index a80d0f5c79ec1..29e9682d58700 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
@@ -28,6 +28,8 @@
 // CHECK-NEXT:     FEAT_FP16                                              Enable half-precision floating-point data processing
 // CHECK-NEXT:     FEAT_FP8                                               Enable FP8 instructions
 // CHECK-NEXT:     FEAT_FP8DOT2                                           Enable FP8 2-way dot instructions
+// CHECK-NEXT:     FEAT_FP8DOT4                                           Enable FP8 4-way dot instructions
+// CHECK-NEXT:     FEAT_FP8FMA                                            Enable Armv9.5-A FP8 multiply-add instructions
 // CHECK-NEXT:     FEAT_FPAC                                              Enable Armv8.3-A Pointer Authentication Faulting enhancement
 // CHECK-NEXT:     FEAT_FRINTTS                                           Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int
 // CHECK-NEXT:     FEAT_FlagM                                             Enable Armv8.4-A Flag Manipulation instructions
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index d1d4986d12550..80454b4f72d05 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -868,7 +868,8 @@ def ProcessorFeatures {
                                    FeatureSSBS, FeatureLS64, FeatureCLRBHB,
                                    FeatureSPECRES2, FeatureSVEAES, FeatureSVE2SM4,
                                    FeatureSVE2SHA3, FeatureSVE2, FeatureSVEBitPerm, FeatureETE,
-                                   FeatureMEC, FeatureFAMINMAX, FeatureFP8DOT2, FeatureLUT];
+                                   FeatureMEC, FeatureFAMINMAX, FeatureFP8DOT2, FeatureFP8DOT4,
+                                   FeatureFP8FMA, FeatureLUT];
   list<SubtargetFeature> Carmel   = [HasV8_2aOps, FeatureNEON, FeatureSHA2, FeatureAES,
                                      FeatureFullFP16, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM,
                                      FeatureFPARMv8];

From 2cacf46f35c8fa174a30a4b628a4b19e391a4798 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Sat, 10 May 2025 13:14:01 +0800
Subject: [PATCH 25/83] [X86][TargetLowering] Avoid deleting temporary nodes in
 `getNegatedExpression` (#139029)

In the original case, the third call to `getCheaperNegatedExpression`
deletes the SDNode returned by the first call.
Similar to 74e6030bcbcc8e628f9a99a424342a0c656456f9, this patch uses
`HandleSDNodes` to prevent nodes from being deleted by subsequent calls.
Closes https://github.com/llvm/llvm-project/issues/138944.

(cherry picked from commit 143cce72b1f50bc37363315793b80ae92d2b0ae3)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  9 ++++++++-
 llvm/test/CodeGen/X86/pr138982.ll       | 23 +++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr138982.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 627cef9ead7ff..4413fbb77f415 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54147,12 +54147,19 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (!Flags.hasNoSignedZeros())
       break;
 
+    // Because getCheaperNegatedExpression can delete nodes we need a handle to
+    // keep temporary nodes alive.
+    std::list<HandleSDNode> Handles;
+
     // This is always negatible for free but we might be able to remove some
     // extra operand negations as well.
     SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
-    for (int i = 0; i != 3; ++i)
+    for (int i = 0; i != 3; ++i) {
       NewOps[i] = getCheaperNegatedExpression(
           Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1);
+      if (!!NewOps[i])
+        Handles.emplace_back(NewOps[i]);
+    }
 
     bool NegA = !!NewOps[0];
     bool NegB = !!NewOps[1];
diff --git a/llvm/test/CodeGen/X86/pr138982.ll b/llvm/test/CodeGen/X86/pr138982.ll
new file mode 100644
index 0000000000000..32346d823a9fe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr138982.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 -mattr=+fma | FileCheck %s
+
+define <4 x float> @pr138982(<4 x float> %in_vec) {
+; CHECK-LABEL: pr138982:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-NEXT:    vrcpps %xmm0, %xmm2
+; CHECK-NEXT:    vrcpps %xmm1, %xmm1
+; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vcmpneqps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; CHECK-NEXT:    vblendvps %xmm0, %xmm1, %xmm4, %xmm0
+; CHECK-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm3 * xmm2) + xmm0
+; CHECK-NEXT:    retq
+entry:
+  %fneg = fneg <4 x float> %in_vec
+  %rcp = tail call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %fneg)
+  %cmp = fcmp une <4 x float> zeroinitializer, %in_vec
+  %sel = select <4 x i1> %cmp, <4 x float> %rcp, <4 x float> splat (float 1.000000e+00)
+  %fma = call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %rcp, <4 x float> zeroinitializer, <4 x float> %sel)
+  ret <4 x float> %fma
+}

From 2d079b96a5fb2d1da62cfddbafa6632058b22e76 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Fri, 9 May 2025 17:55:48 -0700
Subject: [PATCH 26/83] release/20.x: [clang-format] Fix a crash on formatting
 missing r_paren/r_brace (#138230)

Backport 79210feb2993ff9a79ef11f8a7016a527d4fcf22
---
 clang/lib/Format/UnwrappedLineParser.cpp | 4 ++--
 clang/unittests/Format/FormatTest.cpp    | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index c3ffabce15ec8..673b3e6c4b8c2 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1837,8 +1837,8 @@ void UnwrappedLineParser::parseStructuralElement(
       nextToken();
       if (FormatTok->is(tok::l_paren)) {
         parseParens();
-        assert(FormatTok->Previous);
-        if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
+        if (FormatTok->Previous &&
+            FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
                                               tok::l_paren)) {
           Line->SeenDecltypeAuto = true;
         }
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 49e1fde1d9ccf..90a79230e9f4c 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -13962,6 +13962,8 @@ TEST_F(FormatTest, IncorrectCodeUnbalancedBraces) {
   verifyNoCrash("struct Foo {\n"
                 "  operator foo(bar\n"
                 "};");
+  verifyNoCrash("decltype( {\n"
+                "  {");
 }
 
 TEST_F(FormatTest, IncorrectUnbalancedBracesInMacrosWithUnicode) {

From 74ed1ac61104afd632b8553fe64524851ef37478 Mon Sep 17 00:00:00 2001
From: mojyack <66899529+mojyack@users.noreply.github.com>
Date: Fri, 11 Apr 2025 06:23:26 +0900
Subject: [PATCH 27/83] [sanitizer_common] Fix build on ppc64+musl (#120036)

In powerpc64-unknown-linux-musl, signal.h does not include asm/ptrace.h,
which causes "member access into incomplete type 'struct pt_regs'"
errors. Include the header explicitly to fix this.

Also in sanitizer_linux_libcdep.cpp, there is a usage of TlsPreTcbSize
which is not defined in such a platform. Guard the branch with macro.

(cherry picked from commit 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77)
---
 .../lib/sanitizer_common/sanitizer_linux.cpp        |  4 ++++
 .../sanitizer_common/sanitizer_linux_libcdep.cpp    | 13 +++++++------
 .../sanitizer_platform_limits_posix.cpp             |  2 +-
 .../sanitizer_stoptheworld_linux_libcdep.cpp        |  3 ++-
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 7aa48d29d2d53..a4d526b4466c3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -86,6 +86,10 @@
 #    include <sys/sysmacros.h>
 #  endif
 
+#  if SANITIZER_LINUX && defined(__powerpc64__)
+#    include <asm/ptrace.h>
+#  endif
+
 #  if SANITIZER_FREEBSD
 #    include <machine/atomic.h>
 #    include <sys/exec.h>
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index e11eff13cd326..331e1c7d8d152 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -619,21 +619,22 @@ static void GetTls(uptr *addr, uptr *size) {
   *addr = tp - RoundUpTo(*size, align);
   *size = tp - *addr + ThreadDescriptorSize();
 #      else
-  if (SANITIZER_GLIBC)
-    *size += 1664;
-  else if (SANITIZER_FREEBSD)
-    *size += 128;  // RTLD_STATIC_TLS_EXTRA
-#        if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
+#        if SANITIZER_GLIBC
+  *size += 1664;
+#        elif SANITIZER_FREEBSD
+  *size += 128;  // RTLD_STATIC_TLS_EXTRA
+#          if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
   const uptr pre_tcb_size = TlsPreTcbSize();
   *addr -= pre_tcb_size;
   *size += pre_tcb_size;
-#        else
+#          else
   // arm and aarch64 reserve two words at TP, so this underestimates the range.
   // However, this is sufficient for the purpose of finding the pointers to
   // thread-specific data keys.
   const uptr tcb_size = ThreadDescriptorSize();
   *addr -= tcb_size;
   *size += tcb_size;
+#          endif
 #        endif
 #      endif
 #    elif SANITIZER_NETBSD
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index a5311d266b0c4..ec5f2edab6a64 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -96,7 +96,7 @@
 # include <sys/ptrace.h>
 #    if defined(__mips64) || defined(__aarch64__) || defined(__arm__) ||       \
         defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || \
-        defined(__sparc__)
+        defined(__sparc__) || defined(__powerpc64__)
 #      include <asm/ptrace.h>
 #      ifdef __arm__
 typedef struct user_fpregs elf_fpregset_t;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 945da99d41f4e..58d17d90c343a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -31,7 +31,8 @@
 #include <sys/types.h> // for pid_t
 #include <sys/uio.h> // for iovec
 #include <elf.h> // for NT_PRSTATUS
-#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \
+#if (defined(__aarch64__) || defined(__powerpc64__) || \
+     SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) &&    \
      !SANITIZER_ANDROID
 // GLIBC 2.20+ sys/user does not include asm/ptrace.h
 # include <asm/ptrace.h>

From 0439d1d36312b4abe705d8048cfae64e7fedff6a Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Fri, 2 May 2025 18:45:24 +0200
Subject: [PATCH 28/83] [Clang] Fix handling of reference types in
 tryEvaluateBuiltinObjectSize (#138247)

The order of operation was slightly incorrect, as we were checking for
incomplete types *before* handling reference types.

Fixes #129397

---------

Co-authored-by: Erich Keane <ekeane@nvidia.com>
---
 clang/lib/AST/ExprConstant.cpp                   |  8 +++++---
 clang/test/SemaCXX/builtin-object-size-cxx14.cpp | 12 ++++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 5aae78dd2fee7..23602362eaa79 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12710,11 +12710,13 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
   bool DetermineForCompleteObject = refersToCompleteObject(LVal);
 
   auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) {
-    if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType())
+    if (Ty.isNull())
       return false;
 
-    if (Ty->isReferenceType())
-      Ty = Ty.getNonReferenceType();
+    Ty = Ty.getNonReferenceType();
+
+    if (Ty->isIncompleteType() || Ty->isFunctionType())
+      return false;
 
     return HandleSizeof(Info, ExprLoc, Ty, Result);
   };
diff --git a/clang/test/SemaCXX/builtin-object-size-cxx14.cpp b/clang/test/SemaCXX/builtin-object-size-cxx14.cpp
index b7c6f6be01f54..fdd3cb7af088f 100644
--- a/clang/test/SemaCXX/builtin-object-size-cxx14.cpp
+++ b/clang/test/SemaCXX/builtin-object-size-cxx14.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx14 -std=c++14 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++2a %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s
+
 
 typedef __SIZE_TYPE__ size_t;
 
@@ -119,3 +121,13 @@ constexpr int bos_new() { // cxx14-error {{constant expression}}
   void *p = new int; // cxx14-note {{until C++20}}
   return __builtin_object_size(p, 0);
 }
+
+
+namespace GH129397 {
+
+struct incomplete;
+void test(incomplete &ref) {
+  __builtin_object_size(&ref, 1);
+}
+
+}

From 1c0368417f55417b8b08ae5c605231be096ef4bc Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs@gmail.com>
Date: Mon, 17 Feb 2025 11:12:55 +0100
Subject: [PATCH 29/83] [clang][analysis] Fix flaky
 clang/test/Analysis/live-stmts.cpp test (2nd attempt) (#127406)

In my previous attempt (#126913) of fixing the flaky case was on a good
track when I used the begin locations as a stable ordering. However, I
forgot to consider the case when the begin locations are the same among
the Exprs.

In an `EXPENSIVE_CHECKS` build, arrays are randomly shuffled prior to
sorting them. This exposed the flaky behavior much more often basically
breaking the "stability" of the vector - as it should.
Because of this, I had to revert the previous fix attempt in #127034.

To fix this, I use this time `Expr::getID` for a stable ID for an Expr.

Hopefully fixes #126619
Hopefully fixes #126804

(cherry picked from commit f378e52ed3c6f8da4973f97f1ef043c2eb0da721)
---
 clang/lib/Analysis/LiveVariables.cpp | 11 +++++++++--
 clang/test/Analysis/live-stmts.cpp   |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp
index 481932ee59c8e..5fb5ee767a683 100644
--- a/clang/lib/Analysis/LiveVariables.cpp
+++ b/clang/lib/Analysis/LiveVariables.cpp
@@ -662,12 +662,19 @@ void LiveVariables::dumpExprLiveness(const SourceManager &M) {
 }
 
 void LiveVariablesImpl::dumpExprLiveness(const SourceManager &M) {
+  const ASTContext &Ctx = analysisContext.getASTContext();
+  auto ByIDs = [&Ctx](const Expr *L, const Expr *R) {
+    return L->getID(Ctx) < R->getID(Ctx);
+  };
+
   // Don't iterate over blockEndsToLiveness directly because it's not sorted.
   for (const CFGBlock *B : *analysisContext.getCFG()) {
-
     llvm::errs() << "\n[ B" << B->getBlockID()
                  << " (live expressions at block exit) ]\n";
-    for (const Expr *E : blocksEndToLiveness[B].liveExprs) {
+    std::vector<const Expr *> LiveExprs;
+    llvm::append_range(LiveExprs, blocksEndToLiveness[B].liveExprs);
+    llvm::sort(LiveExprs, ByIDs);
+    for (const Expr *E : LiveExprs) {
       llvm::errs() << "\n";
       E->dump();
     }
diff --git a/clang/test/Analysis/live-stmts.cpp b/clang/test/Analysis/live-stmts.cpp
index c60f522588e39..ca2ff6da8b133 100644
--- a/clang/test/Analysis/live-stmts.cpp
+++ b/clang/test/Analysis/live-stmts.cpp
@@ -44,6 +44,8 @@ int testThatDumperWorks(int x, int y, int z) {
 // CHECK-NEXT: ImplicitCastExpr {{.*}} <IntegralToBoolean>
 // CHECK-NEXT: `-ImplicitCastExpr {{.*}} <LValueToRValue>
 // CHECK-NEXT:   `-DeclRefExpr {{.*}} 'x' 'int'
+// CHECK-EMPTY:
+// CHECK-EMPTY:
 // CHECK: [ B4 (live expressions at block exit) ]
 // CHECK-EMPTY:
 // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int'

From a708fb737a78ff0b3d13d3d8e21f354542947e07 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp@bytedance.com>
Date: Sun, 27 Apr 2025 11:12:47 +0800
Subject: [PATCH 30/83] [RISCV] Allow `Zicsr`/`Zifencei` to duplicate with `g`
 (#136842)

This matches GCC and we supported it in LLVM 17/18.

Fixes #136803

(cherry picked from commit 6c3373534305a2ce23dd939344dd0a387a09fe88)
---
 clang/docs/ReleaseNotes.rst                    |  2 ++
 llvm/lib/TargetParser/RISCVISAInfo.cpp         | 18 +++++++++++++++---
 .../TargetParser/RISCVISAInfoTest.cpp          |  8 ++++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b8f26ec9a5447..47ef2f80ac3f2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1267,6 +1267,8 @@ RISC-V Support
 - The option ``-mcmodel=large`` for the large code model is supported.
 - Bump RVV intrinsic to version 1.0, the spec: https://github.com/riscv-non-isa/rvv-intrinsic-doc/releases/tag/v1.0.0-rc4
 
+- `Zicsr` / `Zifencei` are allowed to be duplicated in the presence of `g` in `-march`.
+
 CUDA/HIP Language Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 - Fixed a bug about overriding a constexpr pure-virtual member function with a non-constexpr virtual member function which causes compilation failure when including standard C++ header `format`.
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index c78d60fd86b3f..64ec411cb06e1 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -45,9 +45,8 @@ struct RISCVProfile {
 
 } // end anonymous namespace
 
-static const char *RISCVGImplications[] = {
-  "i", "m", "a", "f", "d", "zicsr", "zifencei"
-};
+static const char *RISCVGImplications[] = {"i", "m", "a", "f", "d"};
+static const char *RISCVGImplicationsZi[] = {"zicsr", "zifencei"};
 
 #define GET_SUPPORTED_EXTENSIONS
 #include "llvm/TargetParser/RISCVTargetParserDef.inc"
@@ -718,6 +717,19 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     } while (!Ext.empty());
   }
 
+  // We add Zicsr/Zifenci as final to allow duplicated "zicsr"/"zifencei" like
+  // "rv64g_zicsr_zifencei".
+  if (Baseline == 'g') {
+    for (const char *Ext : RISCVGImplicationsZi) {
+      if (ISAInfo->Exts.count(Ext))
+        continue;
+
+      auto Version = findDefaultVersion(Ext);
+      assert(Version && "Default extension version not found?");
+      ISAInfo->Exts[std::string(Ext)] = {Version->Major, Version->Minor};
+    }
+  }
+
   return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
 }
 
diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
index 7ebfcf915a7c5..5089bc0fd479a 100644
--- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
@@ -507,6 +507,14 @@ TEST(ParseArchString, RejectsDoubleOrTrailingUnderscore) {
 }
 
 TEST(ParseArchString, RejectsDuplicateExtensionNames) {
+  // Zicsr/Zifencei are allowed to duplicate with "g".
+  ASSERT_THAT_EXPECTED(RISCVISAInfo::parseArchString("rv64g_zicsr", true),
+                       Succeeded());
+  ASSERT_THAT_EXPECTED(RISCVISAInfo::parseArchString("rv64g_zifencei", true),
+                       Succeeded());
+  ASSERT_THAT_EXPECTED(
+      RISCVISAInfo::parseArchString("rv64g_zicsr_zifencei", true), Succeeded());
+
   EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64ii", true).takeError()),
             "invalid standard user-level extension 'i'");
   EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv32ee", true).takeError()),

From 7b09d7b446383b71b63d429b21ee45ba389c5134 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy@ericsson.com>
Date: Mon, 12 May 2025 10:56:29 +0200
Subject: [PATCH 31/83] [analyzer] Workaround for slowdown spikes (unintended
 scope increase) (#136720)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recently some users reported that they observed large increases of
runtime (up to +600% on some translation units) when they upgraded to a
more recent (slightly patched, internal) clang version. Bisection
revealed that the bulk of this increase was probably caused by my
earlier commit bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849 ("Don't assume
third iteration in loops").

As I evaluated that earlier commit on several open source project, it
turns out that on average it's runtime-neutral (or slightly helpful: it
reduced the total analysis time by 1.5%) but it can cause runtime spikes
on some code: in particular it more than doubled the time to analyze
`tmux` (one of the smaller test projects).

Further profiling and investigation proved that these spikes were caused
by an _increase of analysis scope_ because there was an heuristic that
placed functions on a "don't inline this" blacklist if they reached the
`-analyzer-max-loop` limit (anywhere, on any one execution path) --
which became significantly rarer when my commit ensured the analyzer no
longer "just assumes" four iterations. (With more inlining significantly
more entry points use up their allocated budgets, which leads to the
increased runtime.)

I feel that this heuristic for the "don't inline" blacklist is
unjustified and arbitrary, because reaching the "retry without inlining"
limit on one path does not imply that inlining the function won't be
valuable on other paths -- so I hope that we can eventually replace it
with more "natural" limits of the analysis scope.

However, the runtime increases are annoying for the users whose project
is affected, so I created this quick workaround commit that approximates
the "don't inline" blacklist effects of ambiguous loops (where the
analyzer doesn't understand the loop condition) without fully reverting
the "Don't assume third iteration" commit (to avoid reintroducing the
false positives that were eliminated by it).

Investigating this issue was a team effort: I'm grateful to Endre Fülöp
(gamesh411) who did the bisection and shared his time measurement setup,
and Gábor Tóthvári (tigbr) who helped me in profiling.

(cherry picked from commit 9600a12f0de233324b559f60997b9c2db153fede)
---
 .../StaticAnalyzer/Core/AnalyzerOptions.def   |  13 ++
 .../Core/PathSensitive/FunctionSummary.h      |   4 -
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |  60 +++++-
 clang/test/Analysis/analyzer-config.c         |   1 +
 .../Analysis/loop-based-inlining-prevention.c | 200 ++++++++++++++++++
 clang/test/Analysis/loop-unrolling.cpp        |  30 ++-
 6 files changed, 286 insertions(+), 22 deletions(-)
 create mode 100644 clang/test/Analysis/loop-based-inlining-prevention.c

diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 34bb7a809162b..dbb8e832db5ff 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -385,6 +385,19 @@ ANALYZER_OPTION(
     "flex\" won't be analyzed.",
     true)
 
+ANALYZER_OPTION(
+    bool, InlineFunctionsWithAmbiguousLoops, "inline-functions-with-ambiguous-loops",
+    "If disabled (the default), the analyzer puts functions on a \"do not "
+    "inline this\" list if it finds an execution path within that function "
+    "that may potentially perform 'analyzer-max-loop' (= 4 by default) "
+    "iterations in a loop. (Note that functions that _definitely_ reach the "
+    "loop limit on some execution path are currently marked as \"do not "
+    "inline\" even if this option is enabled.) Enabling this option "
+    "eliminates this (somewhat arbitrary) restriction from the analysis "
+    "scope, which increases the analysis runtime (on average by ~10%, but "
+    "a few translation units may see much larger slowdowns).",
+    false)
+
 //===----------------------------------------------------------------------===//
 // Unsigned analyzer options.
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
index 3ee0d229cfc29..761395260a0cf 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
@@ -81,10 +81,6 @@ class FunctionSummariesTy {
     I->second.MayInline = 0;
   }
 
-  void markReachedMaxBlockCount(const Decl *D) {
-    markShouldNotInline(D);
-  }
-
   std::optional<bool> mayInline(const Decl *D) {
     MapTy::const_iterator I = Map.find(D);
     if (I != Map.end() && I->second.InlineChecked)
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 140c77790496d..cfb8be2e7f0f8 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2510,6 +2510,20 @@ bool ExprEngine::replayWithoutInlining(ExplodedNode *N,
   return true;
 }
 
+/// Return the innermost location context which is inlined at `Node`, unless
+/// it's the top-level (entry point) location context.
+static const LocationContext *getInlinedLocationContext(ExplodedNode *Node,
+                                                        ExplodedGraph &G) {
+  const LocationContext *CalleeLC = Node->getLocation().getLocationContext();
+  const LocationContext *RootLC =
+      (*G.roots_begin())->getLocation().getLocationContext();
+
+  if (CalleeLC->getStackFrame() == RootLC->getStackFrame())
+    return nullptr;
+
+  return CalleeLC;
+}
+
 /// Block entrance.  (Update counters).
 void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
                                          NodeBuilderWithSinks &nodeBuilder,
@@ -2557,21 +2571,24 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
     const ExplodedNode *Sink =
                    nodeBuilder.generateSink(Pred->getState(), Pred, &tag);
 
-    // Check if we stopped at the top level function or not.
-    // Root node should have the location context of the top most function.
-    const LocationContext *CalleeLC = Pred->getLocation().getLocationContext();
-    const LocationContext *CalleeSF = CalleeLC->getStackFrame();
-    const LocationContext *RootLC =
-                        (*G.roots_begin())->getLocation().getLocationContext();
-    if (RootLC->getStackFrame() != CalleeSF) {
-      Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl());
+    if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) {
+      // FIXME: This will unconditionally prevent inlining this function (even
+      // from other entry points), which is not a reasonable heuristic: even if
+      // we reached max block count on this particular execution path, there
+      // may be other execution paths (especially with other parametrizations)
+      // where the analyzer can reach the end of the function (so there is no
+      // natural reason to avoid inlining it). However, disabling this would
+      // significantly increase the analysis time (because more entry points
+      // would exhaust their allocated budget), so it must be compensated by a
+      // different (more reasonable) reduction of analysis scope.
+      Engine.FunctionSummaries->markShouldNotInline(
+          LC->getStackFrame()->getDecl());
 
       // Re-run the call evaluation without inlining it, by storing the
       // no-inlining policy in the state and enqueuing the new work item on
       // the list. Replay should almost never fail. Use the stats to catch it
       // if it does.
-      if ((!AMgr.options.NoRetryExhausted &&
-           replayWithoutInlining(Pred, CalleeLC)))
+      if ((!AMgr.options.NoRetryExhausted && replayWithoutInlining(Pred, LC)))
         return;
       NumMaxBlockCountReachedInInlined++;
     } else
@@ -2835,8 +2852,29 @@ void ExprEngine::processBranch(
       // conflicts with the widen-loop analysis option (which is off by
       // default). If we intend to support and stabilize the loop widening,
       // we must ensure that it 'plays nicely' with this logic.
-      if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops)
+      if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) {
         Builder.generateNode(StTrue, true, PredN);
+      } else if (!AMgr.options.InlineFunctionsWithAmbiguousLoops) {
+        // FIXME: There is an ancient and arbitrary heuristic in
+        // `ExprEngine::processCFGBlockEntrance` which prevents all further
+        // inlining of a function if it finds an execution path within that
+        // function which reaches the `MaxBlockVisitOnPath` limit (a/k/a
+        // `analyzer-max-loop`, by default four iterations in a loop). Adding
+        // this "don't assume third iteration" logic significantly increased
+        // the analysis runtime on some inputs because less functions were
+        // arbitrarily excluded from being inlined, so more entry points used
+        // up their full allocated budget. As a hacky compensation for this,
+        // here we apply the "should not inline" mark in cases when the loop
+        // could potentially reach the `MaxBlockVisitOnPath` limit without the
+        // "don't assume third iteration" logic. This slightly overcompensates
+        // (activates if the third iteration can be entered, and will not
+        // recognize cases where the fourth iteration would't be completed), but
+        // should be good enough for practical purposes.
+        if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) {
+          Engine.FunctionSummaries->markShouldNotInline(
+              LC->getStackFrame()->getDecl());
+        }
+      }
     }
 
     if (StFalse)
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index d5eb790b82f23..b47ca59e79827 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -88,6 +88,7 @@
 // CHECK-NEXT: graph-trim-interval = 1000
 // CHECK-NEXT: ignore-bison-generated-files = true
 // CHECK-NEXT: ignore-flex-generated-files = true
+// CHECK-NEXT: inline-functions-with-ambiguous-loops = false
 // CHECK-NEXT: inline-lambdas = true
 // CHECK-NEXT: ipa = dynamic-bifurcate
 // CHECK-NEXT: ipa-always-inline-size = 3
diff --git a/clang/test/Analysis/loop-based-inlining-prevention.c b/clang/test/Analysis/loop-based-inlining-prevention.c
new file mode 100644
index 0000000000000..73627112e2d32
--- /dev/null
+++ b/clang/test/Analysis/loop-based-inlining-prevention.c
@@ -0,0 +1,200 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify=expected,default %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config inline-functions-with-ambiguous-loops=true -verify=expected,enabled %s
+
+// This file tests some heuristics in the engine that put functions on a
+// "do not inline" list if their analyisis reaches the `analyzer-max-loop`
+// limit (by default 4 iterations) in a loop. This was almost surely intended
+// as memoization optimization for the "retry without inlining" fallback (if we
+// had to retry once, next time don't even try inlining), but aggressively
+// oversteps the "natural" scope: reaching 4 iterations on _one particular_
+// execution path does not imply that each path would need "retry without
+// inlining" especially if a different call receives different arguments.
+//
+// This heuristic significantly affects the scope/depth of the analysis (and
+// therefore the execution time) because without this limitation on the
+// inlining significantly more entry points would be able to exhaust their
+// `max-nodes` quota. (Trivial thin wrappers around big complex functions are
+// common in many projects.)
+//
+// Unfortunately, this arbitrary heuristic strongly relies on the current loop
+// handling model and its many limitations, so improvements in loop handling
+// can cause surprising slowdowns by reducing the "do not inline" blacklist.
+// In the tests "FIXME-BUT-NEEDED" comments mark "problematic" (aka buggy)
+// analyzer behavior which cannot be fixed without also improving the
+// heuristics for (not) inlining large functions.
+
+  int getNum(void); // Get an unknown symbolic number.
+
+void clang_analyzer_dump(int arg);
+
+//-----------------------------------------------------------------------------
+// Simple case: inlined function never reaches `analyzer-max-loop`, so it is
+// always inlined.
+
+int inner_simple(int callIdx) {
+  clang_analyzer_dump(callIdx); // expected-warning {{1 S32}}
+                                // expected-warning@-1 {{2 S32}}
+  return 42;
+}
+
+int outer_simple(void) {
+  int x = inner_simple(1);
+  int y = inner_simple(2);
+  return 53 / (x - y); // expected-warning {{Division by zero}}
+}
+
+//-----------------------------------------------------------------------------
+// Inlined function always reaches `analyzer-max-loop`, which stops the
+// analysis on that path and puts the function on the "do not inline" list.
+
+int inner_fixed_loop_1(int callIdx) {
+  int i;
+  clang_analyzer_dump(callIdx); // expected-warning {{1 S32}}
+  for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis.
+  clang_analyzer_dump(callIdx); // no-warning
+  return 42;
+}
+
+int outer_fixed_loop_1(void) {
+  int x = inner_fixed_loop_1(1);
+  int y = inner_fixed_loop_1(2);
+
+ // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division.
+  return 53 / (x - y); // no-warning
+}
+
+//-----------------------------------------------------------------------------
+// Inlined function always reaches `analyzer-max-loop`; inlining is prevented
+// even for different entry points.
+// NOTE: the analyzer happens to analyze the entry points in a reversed order,
+// so `outer_2_fixed_loop_2` is analyzed first and it will be the one which is
+// able to inline the inner function.
+
+int inner_fixed_loop_2(int callIdx) {
+  // Identical copy of inner_fixed_loop_1.
+  int i;
+  clang_analyzer_dump(callIdx); // expected-warning {{2 S32}}
+  for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis.
+  clang_analyzer_dump(callIdx); // no-warning
+  return 42;
+}
+
+int outer_1_fixed_loop_2(void) {
+  return inner_fixed_loop_2(1);
+}
+
+int outer_2_fixed_loop_2(void) {
+  return inner_fixed_loop_2(2);
+}
+
+//-----------------------------------------------------------------------------
+// Inlined function reaches `analyzer-max-loop` only in its second call. The
+// function is inlined twice but the second call doesn't finish and ends up
+// being conservatively evaluated.
+
+int inner_parametrized_loop_1(int count) {
+  int i;
+  clang_analyzer_dump(count); // expected-warning {{2 S32}}
+                              // expected-warning@-1 {{10 S32}}
+  for (i = 0; i < count; i++);
+      // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4.
+  clang_analyzer_dump(count); // expected-warning {{2 S32}}
+  return 42;
+}
+
+int outer_parametrized_loop_1(void) {
+  int x = inner_parametrized_loop_1(2);
+  int y = inner_parametrized_loop_1(10);
+
+ // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division.
+  return 53 / (x - y); // no-warning
+}
+
+//-----------------------------------------------------------------------------
+// Inlined function reaches `analyzer-max-loop` on its first call, so the
+// second call isn't inlined (although it could be fully evaluated).
+
+int inner_parametrized_loop_2(int count) {
+  // Identical copy of inner_parametrized_loop_1.
+  int i;
+  clang_analyzer_dump(count); // expected-warning {{10 S32}}
+  for (i = 0; i < count; i++);
+      // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4.
+  clang_analyzer_dump(count); // no-warning
+  return 42;
+}
+
+int outer_parametrized_loop_2(void) {
+  int y = inner_parametrized_loop_2(10);
+  int x = inner_parametrized_loop_2(2);
+
+ // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division.
+  return 53 / (x - y); // no-warning
+}
+
+//-----------------------------------------------------------------------------
+// Inlined function may or may not reach `analyzer-max-loop` depending on an
+// ambiguous check before the loop. This is very similar to the "fixed loop"
+// cases: the function is placed on the "don't inline" list when any execution
+// path reaches `analyzer-max-loop` (even if other execution paths reach the
+// end of the function).
+// NOTE: This is tested with two separate entry points to ensure that one
+// inlined call is fully evaluated before we try to inline the other call.
+// NOTE: the analyzer happens to analyze the entry points in a reversed order,
+// so `outer_2_conditional_loop` is analyzed first and it will be the one which
+// is able to inline the inner function.
+
+int inner_conditional_loop(int callIdx) {
+  int i;
+  clang_analyzer_dump(callIdx); // expected-warning {{2 S32}}
+  if (getNum() == 777) {
+    for (i = 0; i < 10; i++);
+  }
+  clang_analyzer_dump(callIdx); // expected-warning {{2 S32}}
+  return 42;
+}
+
+int outer_1_conditional_loop(void) {
+  return inner_conditional_loop(1);
+}
+
+int outer_2_conditional_loop(void) {
+  return inner_conditional_loop(2);
+}
+
+//-----------------------------------------------------------------------------
+// Inlined function executes an ambiguous loop that may or may not reach
+// `analyzer-max-loop`. Historically, before the "don't assume third iteration"
+// commit (bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849) this worked like the
+// `conditional_loop` cases: the analyzer was able to find a path reaching
+// `analyzer-max-loop` so inlining was disabled. After that commit the analyzer
+// does not _assume_ a third (or later) iteration (i.e. does not enter those
+// iterations if the loop condition is an unknown value), so e.g. this test
+// function does not reach `analyzer-max-loop` iterations and the inlining is
+// not disabled.
+// Unfortunately this change significantly increased the workload and
+// runtime of the analyzer (more entry points used up their budget), so the
+// option `inline-functions-with-ambiguous-loops` was introduced and disabled
+// by default to suppress the inlining in situations where the "don't assume
+// third iteration" logic activates.
+// NOTE: This is tested with two separate entry points to ensure that one
+// inlined call is fully evaluated before we try to inline the other call.
+// NOTE: the analyzer happens to analyze the entry points in a reversed order,
+// so `outer_2_ambiguous_loop` is analyzed first and it will be the one which
+// is able to inline the inner function.
+
+int inner_ambiguous_loop(int callIdx) {
+  int i;
+  clang_analyzer_dump(callIdx); // default-warning {{2 S32}}
+                                // enabled-warning@-1 {{1 S32}}
+                                // enabled-warning@-2 {{2 S32}}
+  for (i = 0; i < getNum(); i++);
+  return i;
+}
+
+int outer_1_ambiguous_loop(void) {
+  return inner_ambiguous_loop(1);
+}
+int outer_2_ambiguous_loop(void) {
+  return inner_ambiguous_loop(2);
+}
diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp
index bf05a7739ce48..ebae81e000c7a 100644
--- a/clang/test/Analysis/loop-unrolling.cpp
+++ b/clang/test/Analysis/loop-unrolling.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify -std=c++14 -DDFS=1 %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify=expected,default -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify=expected,dfs -std=c++14 %s
 
 void clang_analyzer_numTimesReached();
 void clang_analyzer_warnIfReached();
@@ -337,6 +337,7 @@ int nested_both_unrolled() {
 }
 
 int simple_known_bound_loop() {
+  // Iteration count visible: can be unrolled and fully executed.
   for (int i = 2; i < 12; i++) {
     // This function is inlined in nested_inlined_unroll1()
     clang_analyzer_numTimesReached(); // expected-warning {{90}}
@@ -345,27 +346,42 @@ int simple_known_bound_loop() {
 }
 
 int simple_unknown_bound_loop() {
+  // Iteration count unknown: unrolling won't happen and the execution will be
+  // split two times:
+  // (1) split between skipped loop (immediate exit) and entering the loop
+  // (2) split between exit after 1 iteration and entering the second iteration
+  // After these there is no third state split because the "don't assume third
+  // iteration" logic in `ExprEngine::processBranch` prevents it; but the
+  // `legacy-inlining-prevention` logic will put this function onto the list of
+  // functions that may not be inlined in the future.
+  // The exploration strategy apparently influences the number of times this
+  // function can be inlined before it's placed on the "don't inline" list.
   for (int i = 2; i < getNum(); i++) {
-    clang_analyzer_numTimesReached(); // expected-warning {{8}}
+    clang_analyzer_numTimesReached(); // default-warning {{4}} dfs-warning {{8}}
   }
   return 0;
 }
 
 int nested_inlined_unroll1() {
+  // Here the analyzer can unroll and fully execute both the outer loop and the
+  // inner loop within simple_known_bound_loop().
   int k;
   for (int i = 0; i < 9; i++) {
     clang_analyzer_numTimesReached(); // expected-warning {{9}}
-    k = simple_known_bound_loop();    // no reevaluation without inlining
+    k = simple_known_bound_loop();
   }
   int a = 22 / k; // expected-warning {{Division by zero}}
   return 0;
 }
 
 int nested_inlined_no_unroll1() {
+  // Here no unrolling happens and we only run `analyzer-max-loop` (= 4)
+  // iterations of the loop within this function, but some state splits happen
+  // in `simple_unknown_bound_loop()` calls.
   int k;
-  for (int i = 0; i < 9; i++) {
-    clang_analyzer_numTimesReached(); // expected-warning {{10}}
-    k = simple_unknown_bound_loop();  // reevaluation without inlining, splits the state as well
+  for (int i = 0; i < 40; i++) {
+    clang_analyzer_numTimesReached(); // default-warning {{9}} dfs-warning {{12}}
+    k = simple_unknown_bound_loop(); 
   }
   int a = 22 / k; // no-warning
   return 0;

From 1e4d39e07757d10ff7b880565fca2ae12695622a Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 14 May 2025 11:14:42 -0700
Subject: [PATCH 32/83] Bump version to 20.1.6

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index c8cc0b8968b05..40ba164763d60 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 5)
+  set(LLVM_VERSION_PATCH 6)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)

From ff2e8f93f6090965e82d799af43f6dfef52baa66 Mon Sep 17 00:00:00 2001
From: Douglas Yung <douglas.yung@sony.com>
Date: Wed, 7 May 2025 06:13:07 +0000
Subject: [PATCH 33/83] Fix test pfalse-v4i1.ll added in #138712 to require
 asserts.

Should fix build bot failure: https://lab.llvm.org/buildbot/#/builders/202/builds/1102

(cherry picked from commit 194a4a333a95f9e001d2c8abe82c3d4cf8894acf)
---
 llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll
index c0904b8b4fdd6..2c26bb1e310ea 100644
--- a/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll
+++ b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s
+; REQUIRES: asserts
 
 ; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0
 ; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs

From 85e06a7614831577a632905c7e3a4f6501fcabd3 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01@loongson.cn>
Date: Wed, 7 May 2025 09:29:35 +0800
Subject: [PATCH 34/83] [LoongArch] Fix fp_to_uint/fp_to_sint conversion errors
 for lasx (#137129)

Prvious `fp_to_uint/fp_to_sint` patterns for `v4f64 -> v4i32` are wrong.
Conversion error was triggered after pr
https://github.com/llvm/llvm-project/pull/126456.

(cherry picked from commit b5c7724f82b6afe98761d0a1c5b6ee7cd2330ada)
---
 .../LoongArch/LoongArchLASXInstrInfo.td       | 28 +++++++++----------
 .../LoongArch/lasx/ir-instruction/fptosi.ll   |  4 +--
 .../LoongArch/lasx/ir-instruction/fptoui.ll   |  4 +--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 7022fddf34100..9b515a2721d7f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1792,24 +1792,24 @@ def : Pat<(v4f32 (uint_to_fp v4i64:$vj)),
 // XVFTINTRZ_{W_S/L_D}
 def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>;
 def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>;
-def : Pat<(v4i64 (fp_to_sint v4f32:$vj)),
-          (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj),
-                                      sub_128))>;
-def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))),
-          (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238),
-                                                     v4f64:$vj)),
-                          sub_128)>;
+def : Pat<(v4i64(fp_to_sint v4f32:$vj)), (VEXT2XV_D_W(SUBREG_TO_REG(i64 0),
+                                             (VFTINTRZ_W_S v4f32:$vj),
+                                             sub_128))>;
+def : Pat<(v4i32(fp_to_sint v4f64:$vj)),
+          (EXTRACT_SUBREG(XVPICKEV_W(XVPERMI_D(XVFTINTRZ_L_D v4f64:$vj), 238),
+               (XVFTINTRZ_L_D v4f64:$vj)),
+              sub_128)>;
 
 // XVFTINTRZ_{W_SU/L_DU}
 def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>;
 def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>;
-def : Pat<(v4i64 (fp_to_uint v4f32:$vj)),
-          (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj),
-                                        sub_128))>;
-def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))),
-          (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238),
-                                                     v4f64:$vj)),
-                          sub_128)>;
+def : Pat<(v4i64(fp_to_uint v4f32:$vj)), (VEXT2XV_DU_WU(SUBREG_TO_REG(i64 0),
+                                             (VFTINTRZ_WU_S v4f32:$vj),
+                                             sub_128))>;
+def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
+          (EXTRACT_SUBREG(XVPICKEV_W(XVPERMI_D(XVFTINTRZ_LU_D v4f64:$vj), 238),
+               (XVFTINTRZ_LU_D v4f64:$vj)),
+              sub_128)>;
 
 // XVPERMI_Q
 foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll
index 0d9f57b57ffae..ed333c303879c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll
@@ -31,9 +31,9 @@ define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){
 ; CHECK-LABEL: fptosi_v4f64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvftintrz.l.d $xr0, $xr0
 ; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 238
-; CHECK-NEXT:    xvfcvt.s.d $xr0, $xr1, $xr0
-; CHECK-NEXT:    xvftintrz.w.s $xr0, $xr0
+; CHECK-NEXT:    xvpickev.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %in
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll
index 27d70f33cd34e..9c499ba71d646 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll
@@ -31,9 +31,9 @@ define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){
 ; CHECK-LABEL: fptoui_v4f64_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvftintrz.lu.d $xr0, $xr0
 ; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 238
-; CHECK-NEXT:    xvfcvt.s.d $xr0, $xr1, $xr0
-; CHECK-NEXT:    xvftintrz.w.s $xr0, $xr0
+; CHECK-NEXT:    xvpickev.w $xr0, $xr1, $xr0
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x double>, ptr %in

From e3d2c00ccee45e882233fce230c42b23423a8ef7 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Thu, 15 May 2025 03:28:18 -0700
Subject: [PATCH 35/83] [LLD][COFF] Allow -arm64xsameaddress in ARM64EC
 directives (#139631)

Make it a no-op for now, which is sufficient for non-hybrid images.

Fixes #131712.

(cherry picked from commit d5da557782dd47395fb41e03d7663df6319d7ea6)
---
 lld/COFF/Driver.cpp                   |  6 +++
 lld/COFF/Options.td                   |  1 +
 lld/test/COFF/arm64x-sameaddress.test | 56 +++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 lld/test/COFF/arm64x-sameaddress.test

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index f50ca529df4d7..a669b7e9296f6 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -492,6 +492,12 @@ void LinkerDriver::parseDirectives(InputFile *file) {
     case OPT_alternatename:
       parseAlternateName(arg->getValue());
       break;
+    case OPT_arm64xsameaddress:
+      if (!file->symtab.isEC())
+        Warn(ctx) << arg->getSpelling()
+                  << " is not allowed in non-ARM64EC files (" << toString(file)
+                  << ")";
+      break;
     case OPT_defaultlib:
       if (std::optional<StringRef> path = findLibIfNew(arg->getValue()))
         enqueuePath(*path, false, false);
diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td
index b6fd3d0daaef9..ea2e7ded38043 100644
--- a/lld/COFF/Options.td
+++ b/lld/COFF/Options.td
@@ -355,3 +355,4 @@ def tlbid : P_priv<"tlbid">;
 def tlbout : P_priv<"tlbout">;
 def verbose_all : P_priv<"verbose">;
 def guardsym : P_priv<"guardsym">;
+def arm64xsameaddress : P_priv<"arm64xsameaddress">;
diff --git a/lld/test/COFF/arm64x-sameaddress.test b/lld/test/COFF/arm64x-sameaddress.test
new file mode 100644
index 0000000000000..c69be9d268c3b
--- /dev/null
+++ b/lld/test/COFF/arm64x-sameaddress.test
@@ -0,0 +1,56 @@
+REQUIRES: aarch64
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func-arm64ec.s -o func-arm64ec.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows func-arm64.s -o func-arm64.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows drectve.s -o drectve.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows drectve.s -o drectve-arm64.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64.s -o loadconfig-arm64.obj
+
+RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \
+RUN:          func-arm64.obj func-arm64ec.obj drectve.obj
+
+RUN: lld-link -machine:arm64x -dll -noentry -out:out-cmd.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \
+RUN:          func-arm64.obj func-arm64ec.obj -arm64xsameaddress:func
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out-ec.dll loadconfig-arm64ec.obj func-arm64ec.obj drectve.obj
+
+RUN: lld-link -machine:arm64x -dll -noentry -out:out-warn.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \
+RUN:          func-arm64.obj func-arm64ec.obj drectve-arm64.obj 2>&1 | FileCheck --check-prefix=WARN %s
+WARN: lld-link: warning: -arm64xsameaddress: is not allowed in non-ARM64EC files (drectve-arm64.obj)
+
+#--- func-arm64.s
+        .section .text,"xr",discard,func
+        .globl func
+func:
+        mov x0, #1
+        ret
+
+#--- func-arm64ec.s
+        .section .text,"xr",discard,"#func"
+        .globl "#func"
+"#func":
+        mov x0, #2
+        ret
+
+        .weak_anti_dep func
+        .set func,"#func"
+
+        .section .wowthk,"xr",discard,entry_thunk
+        .globl entry_thunk
+entry_thunk:
+        mov x0, #3
+        ret
+
+        .section .test,"dr"
+        .rva func
+
+	.section .hybmp$x,"yi"
+	.symidx "#func"
+	.symidx entry_thunk
+	.word 1
+
+#--- drectve.s
+        .section .drectve, "yn"
+        .ascii " -arm64xsameaddress:func"

From 5befd1fb3c97a5c880da4c1e3ae4c8cf7b614425 Mon Sep 17 00:00:00 2001
From: Shafik Yaghmour <shafik.yaghmour@intel.com>
Date: Thu, 15 May 2025 16:04:37 -0700
Subject: [PATCH 36/83] [Clang][AST] Fix HandleLValueBase to deal with
 references (#140105)

Since P2280R4 Unknown references and pointers was implemented,
HandleLValueBase now has to deal with referneces:

D.MostDerivedType->getAsCXXRecordDecl()

will return a nullptr if D.MostDerivedType is a ReferenceType. The fix
is to use getNonReferenceType() to obtain the Pointee Type if we have a
reference.

Fixes: https://github.com/llvm/llvm-project/issues/139452
(cherry picked from commit 136f2ba2a7bca015ef831c91fb0db5e5e31b7632)

# Conflicts:
#	clang/docs/ReleaseNotes.rst
---
 clang/docs/ReleaseNotes.rst                   |  2 ++
 clang/lib/AST/ExprConstant.cpp                |  6 +++++-
 .../SemaCXX/constant-expression-p2280r4.cpp   | 21 +++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 47ef2f80ac3f2..2f43dc4021fd8 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -909,6 +909,8 @@ Bug Fixes in This Version
   being deleted has a potentially throwing destructor (#GH118660).
 - Clang now outputs correct values when #embed data contains bytes with negative
   signed char values (#GH102798).
+- Fix crash due to unknown references and pointer implementation and handling of
+  base classes. (GH139452)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 23602362eaa79..e0746f4532245 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -3311,7 +3311,11 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj,
     return false;
 
   // Extract most-derived object and corresponding type.
-  DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl();
+  // FIXME: After implementing P2280R4 it became possible to get references
+  // here. We do MostDerivedType->getAsCXXRecordDecl() in several other
+  // locations and if we see crashes in those locations in the future
+  // it may make more sense to move this fix into Lvalue::set.
+  DerivedDecl = D.MostDerivedType.getNonReferenceType()->getAsCXXRecordDecl();
   if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength))
     return false;
 
diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
index 6c9a87267109c..87beeb4d3dc84 100644
--- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp
+++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
@@ -179,3 +179,24 @@ namespace extern_reference_used_as_unknown {
   int y;
   constinit int& g = (x,y); // expected-warning {{left operand of comma operator has no effect}}
 }
+
+namespace GH139452 {
+struct Dummy {
+  explicit operator bool() const noexcept { return true; }
+};
+
+struct Base { int error; };
+struct Derived : virtual Base { };
+
+template <class R>
+constexpr R get_value() {
+    const auto& derived_val = Derived{};
+    if (derived_val.error != 0)
+        /* nothing */;
+    return R{};
+}
+
+int f() {
+    return !get_value<Dummy>(); // contextually convert the function call result to bool
+}
+}

From a169f5ca4e4fdd3e55baef9e69a3fa92e79d7b4a Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss@arm.com>
Date: Tue, 29 Apr 2025 14:35:15 +0200
Subject: [PATCH 37/83] Correct position of CFI Instruction for Pointer
 Authentication"

This a partial of reverts commit 0b73b5af60f2c544892b9dd68b4fa43eeff52fc1.
Fixes #137802
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp | 13 ++++--
 .../machine-outliner-retaddr-sign-cfi.ll      |  2 +-
 ...tliner-retaddr-sign-diff-scope-same-key.ll | 12 ++---
 .../machine-outliner-retaddr-sign-non-leaf.ll | 12 ++---
 .../machine-outliner-retaddr-sign-regsave.mir |  2 +-
 ...tliner-retaddr-sign-same-scope-diff-key.ll | 12 ++---
 .../machine-outliner-retaddr-sign-sp-mod.mir  | 34 +++++++-------
 ...machine-outliner-retaddr-sign-subtarget.ll |  6 +--
 .../machine-outliner-retaddr-sign-thunk.ll    | 18 ++++----
 .../AArch64/pacbti-llvm-generated-funcs-2.ll  |  4 +-
 ...sign-return-address-cfi-negate-ra-state.ll | 24 +++++-----
 .../CodeGen/AArch64/sign-return-address.ll    | 44 +++++++++----------
 .../MIR/AArch64/return-address-signing.mir    |  4 +-
 13 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index c3bc70ad6f427..aba84574c7526 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -152,12 +152,15 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
         ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
   } else {
     BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup);
-    emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
+    if (MFnI.branchProtectionPAuthLR())
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
     BuildMI(MBB, MBBI, DL,
             TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP
                                                : AArch64::PACIASP))
         .setMIFlag(MachineInstr::FrameSetup)
         ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+    if (!MFnI.branchProtectionPAuthLR())
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
   }
 
   if (!EmitCFI && NeedsWinCFI) {
@@ -220,11 +223,15 @@ void AArch64PointerAuth::authenticateLR(
           .setMIFlag(MachineInstr::FrameDestroy);
     } else {
       BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym);
-      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
-                 EmitAsyncCFI);
+      if (MFnI->branchProtectionPAuthLR())
+        emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+                   EmitAsyncCFI);
       BuildMI(MBB, MBBI, DL,
               TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
           .setMIFlag(MachineInstr::FrameDestroy);
+      if (!MFnI->branchProtectionPAuthLR())
+        emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+                   EmitAsyncCFI);
     }
 
     if (NeedsWinCFI) {
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
index e7de54036245a..4bbbe40176313 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
@@ -9,9 +9,9 @@ define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" {
 ; CHECK-LABEL:         a:                     // @a
 ; CHECK:               // %bb.0:
 ; CHECK-NEXT:          .cfi_b_key_frame
-; CHECK-NEXT:          .cfi_negate_ra_state
 ; V8A-NEXT:            hint #27
 ; V83A-NEXT:           pacibsp
+; CHECK-NEXT:          .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
index a26dda1d5c1f1..6a11bef08c740 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
@@ -5,9 +5,9 @@
 
 define void @a() "sign-return-address"="all" {
 ; CHECK-LABEL:      a:                                     // @a
-; CHECK:      .cfi_negate_ra_state
-; V8A-NEXT:              hint #25
-; V83A-NEXT:             paciasp
+; V8A:              hint #25
+; V83A:             paciasp
+; CHECK-NEXT:      .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -52,9 +52,9 @@ define void @b() "sign-return-address"="non-leaf" {
 
 define void @c() "sign-return-address"="all" {
 ; CHECK-LABEL:         c:              // @c
-; CHECK:      .cfi_negate_ra_state
-; V8A-NEXT:              hint #25
-; V83A-NEXT:             paciasp
+; V8A:                 hint #25
+; V83A:                paciasp
+; CHECK-NEXT          .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
index 064b2b78c7bc7..1e7224683c6c8 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
@@ -8,8 +8,8 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 ; V8A-LABEL: a:
 ; V8A:       // %bb.0:
 ; V8A-NEXT:    .cfi_b_key_frame
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    sub sp, sp, #32
 ; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 32
@@ -26,8 +26,8 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 ; V83A-LABEL: a:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    sub sp, sp, #32
 ; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 32
@@ -59,8 +59,8 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 ; V8A-LABEL: b:
 ; V8A:       // %bb.0:
 ; V8A-NEXT:    .cfi_b_key_frame
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    sub sp, sp, #32
 ; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 32
@@ -77,8 +77,8 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 ; V83A-LABEL: b:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    sub sp, sp, #32
 ; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 32
@@ -110,8 +110,8 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 ; V8A-LABEL: c:
 ; V8A:       // %bb.0:
 ; V8A-NEXT:    .cfi_b_key_frame
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    sub sp, sp, #32
 ; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 32
@@ -128,8 +128,8 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 ; V83A-LABEL: c:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    sub sp, sp, #32
 ; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 32
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
index 218ee6609c803..9a983cbd6714e 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
@@ -81,8 +81,8 @@ body:             |
 # CHECK:         name:            bar
 # CHECK:          bb.0:
 # CHECK:            frame-setup EMITBKEY
-# CHECK-NEXT:       frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:       frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:       frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NOT:        OUTLINED_FUNCTION_
 # CHECK:          bb.1:
 # CHECK-NOT:        OUTLINED_FUNCTION_
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
index 5c45373d8c1d6..87771f5de4f69 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
@@ -7,8 +7,8 @@
 define void @a() "sign-return-address"="all" {
 ; V8A-LABEL: a:
 ; V8A:       // %bb.0:
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    sub sp, sp, #32
 ; V8A-NEXT:    .cfi_def_cfa_offset 32
 ; V8A-NEXT:    mov w8, #1 // =0x1
@@ -26,8 +26,8 @@ define void @a() "sign-return-address"="all" {
 ;
 ; V83A-LABEL: a:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    sub sp, sp, #32
 ; V83A-NEXT:    .cfi_def_cfa_offset 32
 ; V83A-NEXT:    mov w8, #1 // =0x1
@@ -60,8 +60,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
 ; V8A-LABEL: b:
 ; V8A:       // %bb.0:
 ; V8A-NEXT:    .cfi_b_key_frame
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    sub sp, sp, #32
 ; V8A-NEXT:    .cfi_def_cfa_offset 32
 ; V8A-NEXT:    mov w8, #1 // =0x1
@@ -80,8 +80,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
 ; V83A-LABEL: b:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    sub sp, sp, #32
 ; V83A-NEXT:    .cfi_def_cfa_offset 32
 ; V83A-NEXT:    mov w8, #1 // =0x1
@@ -113,8 +113,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
 define void @c() "sign-return-address"="all" {
 ; V8A-LABEL: c:
 ; V8A:       // %bb.0:
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    sub sp, sp, #32
 ; V8A-NEXT:    .cfi_def_cfa_offset 32
 ; V8A-NEXT:    mov w8, #1 // =0x1
@@ -132,8 +132,8 @@ define void @c() "sign-return-address"="all" {
 ;
 ; V83A-LABEL: c:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    sub sp, sp, #32
 ; V83A-NEXT:    .cfi_def_cfa_offset 32
 ; V83A-NEXT:    mov w8, #1 // =0x1
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
index d4a4b886ec0e3..22e5edef2a939 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
@@ -86,11 +86,11 @@ body:             |
 # CHECK:          body:             |
 # CHECK-NEXT:         bb.0 (%ir-block.0):
 # CHECK-NEXT:           liveins: $lr
-# CHECK:                frame-setup CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK:                BL @[[OUTLINED_FUNC:OUTLINED_FUNCTION_[0-9]+]]
-# CHECK:                frame-destroy CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-destroy CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:           RET undef $lr
 
 ...
@@ -119,11 +119,11 @@ body:             |
 # CHECK:          body:             |
 # CHECK-NEXT:         bb.0 (%ir-block.0):
 # CHECK-NEXT:           liveins: $lr
-# CHECK:                frame-setup CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK:                BL @[[OUTLINED_FUNC]]
-# CHECK:                frame-destroy CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-destroy CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:           RET undef $lr
 
 ...
@@ -174,22 +174,22 @@ body:             |
 # CHECK:          body:             |
 # CHECK-NEXT:         bb.0 (%ir-block.0):
 # CHECK-NEXT:           liveins: $lr
-# CHECK:                frame-setup CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NOT:            BL @OUTLINED_FUNCTION_{{.*}}
-# CHECK:                frame-destroy CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-destroy CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:           RET undef $lr
 
 # CHECK-LABEL:    name:            illegal1
 # CHECK:          body:             |
 # CHECK-NEXT:         bb.0 (%ir-block.0):
 # CHECK-NEXT:           liveins: $lr
-# CHECK:                frame-setup CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NOT:            BL @OUTLINED_FUNCTION_{{.*}}
-# CHECK:                frame-destroy CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:           frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK:                frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:           frame-destroy CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:           RET undef $lr
 
 # Outlined function that contains only legal sp modifications
@@ -198,8 +198,8 @@ body:             |
 # CHECK-NEXT:       bb.0:
 # CHECK-NEXT: liveins: $lr
 # CHECK-NEXT: {{^  $}}
-# CHECK-NEXT:         frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:         frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+# CHECK-NEXT:         frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NEXT:         $sp = frame-setup SUBXri $sp, 16, 0
 # CHECK:              $sp = frame-destroy ADDXri $sp, 16, 0
 # CHECK-NEXT:         frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
index cb43b3ba3e47e..a7ea32952f3b7 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
@@ -9,8 +9,8 @@ define void @a() #0 {
 ; CHECK-LABEL:      a:                                     // @a
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
-; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -33,8 +33,8 @@ define void @b() #0 {
 ; CHECK-LABEL:      b:                                     // @b
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
-; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NEXT:               pacibsp
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -57,8 +57,8 @@ define void @c() #1 {
 ; CHECK-LABEL:      c:                                     // @c
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
-; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NEXT:               hint #27
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
index 0ba4455532925..da68ea5bf0dbc 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
@@ -10,8 +10,8 @@ declare i32 @thunk_called_fn(i32, i32, i32, i32)
 define i32 @a() #0 {
 ; V8A-LABEL: a:
 ; V8A:       // %bb.0: // %entry
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 16
 ; V8A-NEXT:    .cfi_offset w30, -16
@@ -27,8 +27,8 @@ define i32 @a() #0 {
 ;
 ; V83A-LABEL: a:
 ; V83A:       // %bb.0: // %entry
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -49,8 +49,8 @@ entry:
 define i32 @b() #0 {
 ; V8A-LABEL: b:
 ; V8A:       // %bb.0: // %entry
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 16
 ; V8A-NEXT:    .cfi_offset w30, -16
@@ -66,8 +66,8 @@ define i32 @b() #0 {
 ;
 ; V83A-LABEL: b:
 ; V83A:       // %bb.0: // %entry
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -88,8 +88,8 @@ entry:
 define hidden i32 @c(ptr %fptr) #0 {
 ; V8A-LABEL: c:
 ; V8A:       // %bb.0: // %entry
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 16
 ; V8A-NEXT:    .cfi_offset w30, -16
@@ -106,8 +106,8 @@ define hidden i32 @c(ptr %fptr) #0 {
 ;
 ; V83A-LABEL: c:
 ; V83A:       // %bb.0: // %entry
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -129,8 +129,8 @@ entry:
 define hidden i32 @d(ptr %fptr) #0 {
 ; V8A-LABEL: d:
 ; V8A:       // %bb.0: // %entry
-; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
 ; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V8A-NEXT:    .cfi_def_cfa_offset 16
 ; V8A-NEXT:    .cfi_offset w30, -16
@@ -147,8 +147,8 @@ define hidden i32 @d(ptr %fptr) #0 {
 ;
 ; V83A-LABEL: d:
 ; V83A:       // %bb.0: // %entry
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -176,5 +176,3 @@ attributes #0 = { "sign-return-address"="non-leaf" minsize }
 ; CHECK-NOT:         .cfi_negate_ra_state
 ; CHECK-NOT:         auti{{[a,b]}}sp
 ; CHECK-NOT:         hint #{{[29,31]}}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
index f823d2aa82ac0..373c4969a9405 100644
--- a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
+++ b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
@@ -34,8 +34,8 @@ entry:
 }
 ;; CHECK-LABEL: __llvm_gcov_writeout:
 ;; CHECK:       .cfi_b_key_frame
-;; CHECK-NEXT:  .cfi_negate_ra_state
 ;; CHECK-NEXT:  pacibsp
+;; CHECK-NEXT:  .cfi_negate_ra_state
 
 define internal void @__llvm_gcov_reset() unnamed_addr #2 {
 entry:
@@ -54,9 +54,9 @@ entry:
 }
 ;; CHECK-LABEL: __llvm_gcov_init:
 ;; CHECK:      .cfi_b_key_frame
+;; CHECK-NEXT:  pacibsp
 ;; CHECK-NEXT:  .cfi_negate_ra_state
 ;; CHECK-NOT:   .cfi_
-;; CHECK-NEXT:  pacibsp
 ;; CHECK:       .cfi_endproc
 
 attributes #0 = { norecurse nounwind readnone "sign-return-address"="all" "sign-return-address-key"="b_key" }
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
index 6ea072846d47c..4d4b7c215b978 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
@@ -9,8 +9,8 @@
 define dso_local i32 @_Z3fooi(i32 %x) #0 {
 ; CHECK-V8A-LABEL: _Z3fooi:
 ; CHECK-V8A:       // %bb.0: // %entry
-; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    hint #25
+; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
@@ -27,8 +27,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
 ;
 ; CHECK-V83A-LABEL: _Z3fooi:
 ; CHECK-V83A:       // %bb.0: // %entry
-; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    paciasp
+; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
@@ -62,8 +62,8 @@ return:                                           ; No predecessors!
 define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) {
 ; CHECK-V8A-LABEL: baz_async:
 ; CHECK-V8A:       // %bb.0: // %entry
-; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    hint #25
+; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
@@ -74,8 +74,8 @@ define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) {
 ; CHECK-V8A-NEXT:    bl _Z3bari
 ; CHECK-V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    hint #29
+; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    .cfi_restore w30
 ; CHECK-V8A-NEXT:    b _Z3bari
 ; CHECK-V8A-NEXT:  .LBB1_2: // %if.else
@@ -84,15 +84,15 @@ define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) {
 ; CHECK-V8A-NEXT:    add w0, w0, #1
 ; CHECK-V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    hint #29
+; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    .cfi_restore w30
 ; CHECK-V8A-NEXT:    ret
 ;
 ; CHECK-V83A-LABEL: baz_async:
 ; CHECK-V83A:       // %bb.0: // %entry
-; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    paciasp
+; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
@@ -103,8 +103,8 @@ define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) {
 ; CHECK-V83A-NEXT:    bl _Z3bari
 ; CHECK-V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    autiasp
+; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    .cfi_restore w30
 ; CHECK-V83A-NEXT:    b _Z3bari
 ; CHECK-V83A-NEXT:  .LBB1_2: // %if.else
@@ -143,8 +143,8 @@ return:                                           ; preds = %if.else, %if.then
 define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
 ; CHECK-V8A-LABEL: baz_sync:
 ; CHECK-V8A:       // %bb.0: // %entry
-; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    hint #25
+; CHECK-V8A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
@@ -164,8 +164,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
 ;
 ; CHECK-V83A-LABEL: baz_sync:
 ; CHECK-V83A:       // %bb.0: // %entry
-; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    paciasp
+; CHECK-V83A-NEXT:    .cfi_negate_ra_state
 ; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
@@ -216,7 +216,7 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP-NOT: DW_CFA_remember_state
 ; CHECK-DUMP-NOT: DW_CFA_restore_state
 
-; CHECK-DUMP: CFA=WSP
+; CHECK-DUMP: CFA=WSP{{$}}
 ; CHECK-DUMP: reg34=1
 ; CHECK-DUMP-NOT: reg34=0
 
@@ -229,7 +229,6 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP:   DW_CFA_restore_state:
 ; CHECK-DUMP:   DW_CFA_AARCH64_negate_ra_state:
 
-; CHECK-DUMP: CFA=WSP
 ;; First DW_CFA_AARCH64_negate_ra_state:
 ; CHECK-DUMP: reg34=1
 ;; Second DW_CFA_AARCH64_negate_ra_state:
@@ -238,6 +237,7 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP: reg34=1
 ;; Third DW_CFA_AARCH64_negate_ra_state:
 ; CHECK-DUMP: reg34=0
+; CHECK-DUMP-NOT: reg34=1
 
 ; baz_sync
 ; CHECK-DUMP-LABEL: FDE
@@ -246,6 +246,6 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP-NOT: DW_CFA_remember_state
 ; CHECK-DUMP-NOT: DW_CFA_restore_state
 
-; CHECK-DUMP: CFA=WSP
+; CHECK-DUMP: CFA=WSP{{$}}
 ; CHECK-DUMP: reg34=1
 ; CHECK-DUMP-NOT: reg34=0
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address.ll b/llvm/test/CodeGen/AArch64/sign-return-address.ll
index e0ee0d84ab4f1..dafe0d71ceb5f 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address.ll
@@ -29,15 +29,15 @@ define i32 @leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 define i32 @leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: leaf_sign_all:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
 ;
 ; V83A-LABEL: leaf_sign_all:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    retaa
   ret i32 %x
 }
@@ -45,8 +45,8 @@ define i32 @leaf_sign_all(i32 %x) "sign-return-address"="all" {
 define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf"  {
 ; COMPAT-LABEL: leaf_clobbers_lr:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
@@ -59,8 +59,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf"  {
 ;
 ; V83A-LABEL: leaf_clobbers_lr:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -78,8 +78,8 @@ declare i32 @foo(i32)
 define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: non_leaf_sign_all:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
@@ -90,8 +90,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ;
 ; V83A-LABEL: non_leaf_sign_all:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -105,8 +105,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 ; COMPAT-LABEL: non_leaf_sign_non_leaf:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
@@ -117,8 +117,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 ;
 ; V83A-LABEL: non_leaf_sign_non_leaf:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -135,8 +135,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [x18], #8
 ; CHECK-NEXT:    .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 //
-; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    paciasp
+; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
@@ -152,8 +152,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac
 define i32 @leaf_sign_all_v83(i32 %x) "sign-return-address"="all" "target-features"="+v8.3a" {
 ; CHECK-LABEL: leaf_sign_all_v83:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    paciasp
+; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    retaa
   ret i32 %x
 }
@@ -163,8 +163,8 @@ declare fastcc i64 @bar(i64)
 define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: spill_lr_and_tail_call:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
@@ -177,8 +177,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 ;
 ; V83A-LABEL: spill_lr_and_tail_call:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
@@ -196,15 +196,15 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 define i32 @leaf_sign_all_a_key(i32 %x) "sign-return-address"="all" "sign-return-address-key"="a_key" {
 ; COMPAT-LABEL: leaf_sign_all_a_key:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
 ;
 ; V83A-LABEL: leaf_sign_all_a_key:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    retaa
   ret i32 %x
 }
@@ -213,16 +213,16 @@ define i32 @leaf_sign_all_b_key(i32 %x) "sign-return-address"="all" "sign-return
 ; COMPAT-LABEL: leaf_sign_all_b_key:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    .cfi_b_key_frame
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #27
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #31
 ; COMPAT-NEXT:    ret
 ;
 ; V83A-LABEL: leaf_sign_all_b_key:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    retab
   ret i32 %x
 }
@@ -231,8 +231,8 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "sign-return-address"="all" "target-
 ; CHECK-LABEL: leaf_sign_all_v83_b_key:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    .cfi_b_key_frame
-; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    retab
   ret i32 %x
 }
@@ -241,15 +241,15 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "sign-return-address"="all" "target-
 define i32 @leaf_sign_all_a_key_bti(i32 %x) "sign-return-address"="all" "sign-return-address-key"="a_key" "branch-target-enforcement"{
 ; COMPAT-LABEL: leaf_sign_all_a_key_bti:
 ; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
 ;
 ; V83A-LABEL: leaf_sign_all_a_key_bti:
 ; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    retaa
   ret i32 %x
 }
@@ -259,16 +259,16 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "sign-return-address"="all" "sign-re
 ; COMPAT-LABEL: leaf_sign_all_b_key_bti:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    .cfi_b_key_frame
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #27
+; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    hint #31
 ; COMPAT-NEXT:    ret
 ;
 ; V83A-LABEL: leaf_sign_all_b_key_bti:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    retab
   ret i32 %x
 }
@@ -278,8 +278,8 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "sign-return-address"="all" "tar
 ; CHECK-LABEL: leaf_sign_all_v83_b_key_bti:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    .cfi_b_key_frame
-; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    retab
   ret i32 %x
 }
diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
index b2abff75880c9..1030917c87419 100644
--- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
@@ -25,8 +25,8 @@ alignment:       4
 tracksRegLiveness: true
 frameInfo:
   maxCallFrameSize: 0
-#CHECK:    frame-setup CFI_INSTRUCTION negate_ra_sign_state
 #CHECK:    frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp
+#CHECK:    frame-setup CFI_INSTRUCTION negate_ra_sign_state
 #CHECK:    frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
 body:             |
   bb.0.entry:
@@ -42,8 +42,8 @@ tracksRegLiveness: true
 frameInfo:
   maxCallFrameSize: 0
 #CHECK:    frame-setup EMITBKEY
-#CHECK:    frame-setup CFI_INSTRUCTION negate_ra_sign_state
 #CHECK:    frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp
+#CHECK:    frame-setup CFI_INSTRUCTION negate_ra_sign_state
 #CHECK:    frame-destroy AUTIBSP implicit-def $lr, implicit $lr, implicit $sp
 body:             |
   bb.0.entry:

From 070cf62530eab91d68521787e08418343b9ca28b Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Tue, 18 Mar 2025 16:45:37 +0100
Subject: [PATCH 38/83] [Clang] Demote mixed enumeration arithmetic error to a
 warning (#131811)

In C++, defaulted to an error.

C++ removed these features but the removal negatively impacts users.

Fixes #92340
---
 clang/docs/ReleaseNotes.rst                      | 3 +++
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 +++++-
 clang/lib/Sema/SemaExpr.cpp                      | 2 +-
 clang/test/SemaCXX/cxx2c-enum-compare.cpp        | 5 +++--
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2f43dc4021fd8..774a00b4feef5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -314,6 +314,9 @@ C++2c Feature Support
 
 - Implemented `P3176R1 The Oxford variadic comma <https://wg21.link/P3176R1>`_
 
+- The error produced when doing arithmetic operations on enums of different types
+  can be disabled with ``-Wno-enum-enum-conversion``. (#GH92340)
+
 C++23 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
 - Removed the restriction to literal types in constexpr functions in C++23 mode.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ec2a140e04d5b..7180447e250ce 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7567,9 +7567,13 @@ def warn_arith_conv_mixed_enum_types_cxx20 : Warning<
   "%sub{select_arith_conv_kind}0 "
   "different enumeration types%diff{ ($ and $)|}1,2 is deprecated">,
   InGroup<DeprecatedEnumEnumConversion>;
-def err_conv_mixed_enum_types_cxx26 : Error<
+
+def err_conv_mixed_enum_types: Error <
   "invalid %sub{select_arith_conv_kind}0 "
   "different enumeration types%diff{ ($ and $)|}1,2">;
+def zzzz_warn_conv_mixed_enum_types_cxx26 : Warning <
+  err_conv_mixed_enum_types.Summary>,
+  InGroup<EnumEnumConversion>, DefaultError;
 
 def warn_arith_conv_mixed_anon_enum_types : Warning<
   warn_arith_conv_mixed_enum_types.Summary>,
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index e253e3a17328f..23d0f9532d4f8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1519,7 +1519,7 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS,
     // In C++ 26, usual arithmetic conversions between 2 different enum types
     // are ill-formed.
     if (S.getLangOpts().CPlusPlus26)
-      DiagID = diag::err_conv_mixed_enum_types_cxx26;
+      DiagID = diag::zzzz_warn_conv_mixed_enum_types_cxx26;
     else if (!L->castAs<EnumType>()->getDecl()->hasNameForLinkage() ||
              !R->castAs<EnumType>()->getDecl()->hasNameForLinkage()) {
       // If either enumeration type is unnamed, it's less likely that the
diff --git a/clang/test/SemaCXX/cxx2c-enum-compare.cpp b/clang/test/SemaCXX/cxx2c-enum-compare.cpp
index f47278a60725e..96fbd368b1696 100644
--- a/clang/test/SemaCXX/cxx2c-enum-compare.cpp
+++ b/clang/test/SemaCXX/cxx2c-enum-compare.cpp
@@ -1,9 +1,10 @@
-// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify -triple %itanium_abi_triple
+// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both,expected
+// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both -Wno-enum-enum-conversion
 
 enum E1 { e };
 enum E2 { f };
 void test() {
-    int b = e <= 3.7; // expected-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}}
+    int b = e <= 3.7; // both-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}}
     int k = f - e; // expected-error {{invalid arithmetic between different enumeration types ('E2' and 'E1')}}
     int x = 1 ? e : f; // expected-error {{invalid conditional expression between different enumeration types ('E1' and 'E2')}}
 }

From 8a36b8e3ab46d69ee1798ded75038715b2080f83 Mon Sep 17 00:00:00 2001
From: Fangyi Zhou <me@fangyi.io>
Date: Thu, 17 Apr 2025 23:02:37 +0100
Subject: [PATCH 39/83] [clang][analyzer] Handle CXXParenInitListExpr alongside
 InitListExpr

As reported in #135665, C++20 parenthesis initializer list expressions
are not handled correctly and were causing crashes. This commit attempts
to fix the issue by handing parenthesis initializer lists along side
existing initializer lists.

(cherry picked from commit 5dc9d55eb04d94c01dba0364b51a509f975e542a)
---
 clang/docs/ReleaseNotes.rst                   |  3 +++
 .../Checkers/DynamicTypePropagation.cpp       |  6 +++---
 .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 10 ++++++----
 clang/test/Analysis/PR135665.cpp              | 19 +++++++++++++++++++
 4 files changed, 31 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/Analysis/PR135665.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 774a00b4feef5..7b84210fddab3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1472,6 +1472,9 @@ Crash and bug fixes
 - The ``unix.BlockInCriticalSection`` now recognizes the ``lock()`` member function
   as expected, even if it's inherited from a base class. Fixes (#GH104241).
 
+- Fixed a crash when C++20 parenthesized initializer lists are used. This issue
+  was causing a crash in clang-tidy. (#GH136041)
+
 Improvements
 ^^^^^^^^^^^^
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index a0bf776b11f53..e58329817d7cd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -379,9 +379,9 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
         // aggregates, and in such case no top-frame constructor will be called.
         // Figure out if we need to do anything in this case.
         // FIXME: Instead of relying on the ParentMap, we should have the
-        // trigger-statement (InitListExpr in this case) available in this
-        // callback, ideally as part of CallEvent.
-        if (isa_and_nonnull<InitListExpr>(
+        // trigger-statement (InitListExpr or CXXParenListInitExpr in this case)
+        // available in this callback, ideally as part of CallEvent.
+        if (isa_and_nonnull<InitListExpr, CXXParenListInitExpr>(
                 LCtx->getParentMap().getParent(Ctor->getOriginExpr())))
           return;
 
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index f7020da2e6da2..30839a40389ba 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -637,9 +637,10 @@ void ExprEngine::handleConstructor(const Expr *E,
     // FIXME: For now this code essentially bails out. We need to find the
     // correct target region and set it.
     // FIXME: Instead of relying on the ParentMap, we should have the
-    // trigger-statement (InitListExpr in this case) passed down from CFG or
-    // otherwise always available during construction.
-    if (isa_and_nonnull<InitListExpr>(LCtx->getParentMap().getParent(E))) {
+    // trigger-statement (InitListExpr or CXXParenListInitExpr in this case)
+    // passed down from CFG or otherwise always available during construction.
+    if (isa_and_nonnull<InitListExpr, CXXParenListInitExpr>(
+            LCtx->getParentMap().getParent(E))) {
       MemRegionManager &MRMgr = getSValBuilder().getRegionManager();
       Target = loc::MemRegionVal(MRMgr.getCXXTempObjectRegion(E, LCtx));
       CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true;
@@ -1010,7 +1011,8 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred,
       // values are properly placed inside the required region, however if an
       // initializer list is used, this doesn't happen automatically.
       auto *Init = CNE->getInitializer();
-      bool isInitList = isa_and_nonnull<InitListExpr>(Init);
+      bool isInitList =
+          isa_and_nonnull<InitListExpr, CXXParenListInitExpr>(Init);
 
       QualType ObjTy =
           isInitList ? Init->getType() : CNE->getType()->getPointeeType();
diff --git a/clang/test/Analysis/PR135665.cpp b/clang/test/Analysis/PR135665.cpp
new file mode 100644
index 0000000000000..124b8c9b97b04
--- /dev/null
+++ b/clang/test/Analysis/PR135665.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_analyze_cc1 -std=c++20 -analyzer-checker=core -verify %s
+
+// expected-no-diagnostics
+
+template<typename... F>
+struct overload : public F...
+{
+  using F::operator()...;
+};
+
+template<typename... F>
+overload(F&&...) -> overload<F...>;
+
+int main()
+{
+  const auto l = overload([](const int* i) {});
+
+  return 0;
+}

From 9b0832508ede38397c8ebb7a348d50ce1517af4a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Tue, 20 May 2025 10:43:50 +0100
Subject: [PATCH 40/83] [SDAG] Ensure load is included in output chain of
 sincos expansion (#140525)

The load not being included in the chain meant that it could materialize
after a `@llvm.lifetime.end` annotation on the pointer. This could
result in miscompiles if the stack slot is reused for another value.

Fixes https://github.com/llvm/llvm-project/issues/140491

(cherry picked from commit c9d62491981fe720c1b3255fa2f9ddf744590c65)
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  9 ++-
 .../CodeGen/X86/pr140491-sincos-lifetimes.ll  | 70 +++++++++++++++++++
 2 files changed, 76 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b416c0efbbc4f..eecfb41c2d319 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2660,16 +2660,19 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
       continue;
     }
     MachinePointerInfo PtrInfo;
+    SDValue LoadResult =
+        getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo);
+    SDValue OutChain = LoadResult.getValue(1);
+
     if (StoreSDNode *ST = ResultStores[ResNo]) {
       // Replace store with the library call.
-      ReplaceAllUsesOfValueWith(SDValue(ST, 0), CallChain);
+      ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
       PtrInfo = ST->getPointerInfo();
     } else {
       PtrInfo = MachinePointerInfo::getFixedStack(
           getMachineFunction(), cast<FrameIndexSDNode>(ResultPtr)->getIndex());
     }
-    SDValue LoadResult =
-        getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo);
+
     Results.push_back(LoadResult);
   }
 
diff --git a/llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll b/llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll
new file mode 100644
index 0000000000000..2ca99bdc4b316
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s | FileCheck %s
+
+; This test is reduced from https://github.com/llvm/llvm-project/issues/140491.
+; It checks that when `@llvm.sincos.f32` is expanded to a call to
+; `sincosf(float, float* out_sin, float* out_cos)` and the store of `%cos` to
+; `%computed` is folded into the `sincosf` call. The use of `%cos`in the later
+; `fneg %cos` -- which expands to a load of `%computed`, will perform the load
+; before the `@llvm.lifetime.end.p0(%computed)` to ensure the correct value is
+; taken for `%cos`.
+
+target triple = "x86_64-sie-ps5"
+
+declare void @use_ptr(ptr readonly)
+
+define i32 @sincos_stack_slot_with_lifetime(float %in)  {
+; CHECK-LABEL: sincos_stack_slot_with_lifetime:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    subq $32, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    leaq 12(%rsp), %rdi
+; CHECK-NEXT:    leaq 8(%rsp), %rbx
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    callq sincosf@PLT
+; CHECK-NEXT:    movss 8(%rsp), %xmm0 # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movaps %xmm0, 16(%rsp) # 16-byte Spill
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    callq use_ptr
+; CHECK-NEXT:    movss 12(%rsp), %xmm0 # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    movss %xmm0, 8(%rsp)
+; CHECK-NEXT:    leaq 8(%rsp), %rdi
+; CHECK-NEXT:    callq use_ptr
+; CHECK-NEXT:    movaps 16(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    movss %xmm0, 8(%rsp)
+; CHECK-NEXT:    leaq 8(%rsp), %rdi
+; CHECK-NEXT:    callq use_ptr
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    addq $32, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %computed = alloca float, align 4
+  %computed1 = alloca float, align 4
+  %computed3 = alloca float, align 4
+  %sincos = tail call { float, float } @llvm.sincos.f32(float %in)
+  %sin = extractvalue { float, float } %sincos, 0
+  %cos = extractvalue { float, float } %sincos, 1
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %computed)
+  store float %cos, ptr %computed, align 4
+  call void @use_ptr(ptr nonnull %computed)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %computed)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %computed1)
+  %fneg_sin = fneg float %sin
+  store float %fneg_sin, ptr %computed1, align 4
+  call void @use_ptr(ptr nonnull %computed1)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %computed1)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %computed3)
+  %fneg_cos = fneg float %cos
+  store float %fneg_cos, ptr %computed3, align 4
+  call void @use_ptr(ptr nonnull %computed3)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %computed3)
+  ret i32 0
+}
+

From 802f4f75372ed66c07f1230666b170c43afbb937 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Tue, 20 May 2025 19:15:57 -0700
Subject: [PATCH 41/83] [clang-format] Handle raw string literals containing
 JSON code (#140666)

Fix #65400

(cherry picked from commit 0dfdf7efbfe347517eb4c7f544043a71af4e4a25)
---
 clang/lib/Format/Format.cpp                   |  6 +++--
 clang/tools/clang-format/ClangFormat.cpp      |  4 ++--
 .../unittests/Format/FormatTestRawStrings.cpp | 22 +++++++++++++++++++
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index b97d8928178b5..aba7db6dd50a8 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -3743,8 +3743,10 @@ reformat(const FormatStyle &Style, StringRef Code,
     tooling::Replacements Replaces =
         Formatter(*Env, Style, Status).process().first;
     // add a replacement to remove the "x = " from the result.
-    Replaces = Replaces.merge(
-        tooling::Replacements(tooling::Replacement(FileName, 0, 4, "")));
+    if (Code.starts_with("x = ")) {
+      Replaces = Replaces.merge(
+          tooling::Replacements(tooling::Replacement(FileName, 0, 4, "")));
+    }
     // apply the reformatting changes and the removal of "x = ".
     if (applyAllReplacements(Code, Replaces))
       return {Replaces, 0};
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 28610052b9b74..96eec7c666a38 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -492,8 +492,8 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) {
   // To format JSON insert a variable to trick the code into thinking its
   // JavaScript.
   if (IsJson && !FormatStyle->DisableFormat) {
-    auto Err = Replaces.add(tooling::Replacement(
-        tooling::Replacement(AssumedFileName, 0, 0, "x = ")));
+    auto Err =
+        Replaces.add(tooling::Replacement(AssumedFileName, 0, 0, "x = "));
     if (Err)
       llvm::errs() << "Bad Json variable insertion\n";
   }
diff --git a/clang/unittests/Format/FormatTestRawStrings.cpp b/clang/unittests/Format/FormatTestRawStrings.cpp
index 0615fb1fad4c5..3f09c7b6086e5 100644
--- a/clang/unittests/Format/FormatTestRawStrings.cpp
+++ b/clang/unittests/Format/FormatTestRawStrings.cpp
@@ -988,6 +988,28 @@ fffffffffffffffffffff("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
                       )pb");)test",
                    Style));
 }
+
+TEST_F(FormatTestRawStrings, Json) {
+  auto Style = getLLVMStyle();
+  Style.RawStringFormats = {
+      {
+          /*Language=*/FormatStyle::LK_Json,
+          /*Delimiters=*/{"json"},
+          /*EnclosingFunctions=*/{},
+          /*CanonicalDelimiter=*/"",
+          /*BasedOnStyle=*/"llvm",
+      },
+  };
+
+  EXPECT_EQ("json = R\"json({\n"
+            "                \"str\": \"test\"\n"
+            "              })json\";",
+            format("json = R\"json({\n"
+                   "  \"str\": \"test\"\n"
+                   "})json\";",
+                   Style));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang

From 15ec590e389b57f47d817f32d34e486c5ab37f70 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Wed, 21 May 2025 21:37:54 -0700
Subject: [PATCH 42/83] release/20.x: [clang-format] Fix the indent of
 StartOfName after AttributeMacro (#140361)

Backport 0cac25bcf5a246eb8a1f02d5041731ae9a6f00e0
---
 clang/lib/Format/ContinuationIndenter.cpp | 4 +++-
 clang/unittests/Format/FormatTest.cpp     | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 6f7d213c0b559..d953348b0258d 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1452,7 +1452,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
        (PreviousNonComment->ClosesTemplateDeclaration ||
         PreviousNonComment->ClosesRequiresClause ||
         (PreviousNonComment->is(TT_AttributeMacro) &&
-         Current.isNot(tok::l_paren)) ||
+         Current.isNot(tok::l_paren) &&
+         !Current.endsSequence(TT_StartOfName, TT_AttributeMacro,
+                               TT_PointerOrReference)) ||
         PreviousNonComment->isOneOf(
             TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen,
             TT_JavaAnnotation, TT_LeadingJavaAnnotation))) ||
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 90a79230e9f4c..1afcc75a2e19e 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -12419,6 +12419,13 @@ TEST_F(FormatTest, UnderstandsAttributes) {
   verifyFormat("SomeType s __unused{InitValue};", CustomAttrs);
   verifyFormat("SomeType *__capability s(InitValue);", CustomAttrs);
   verifyFormat("SomeType *__capability s{InitValue};", CustomAttrs);
+
+  auto Style = getLLVMStyleWithColumns(60);
+  Style.AttributeMacros.push_back("my_fancy_attr");
+  Style.PointerAlignment = FormatStyle::PAS_Left;
+  verifyFormat("void foo(const MyLongTypeNameeeeeeeeeeeee* my_fancy_attr\n"
+               "             testttttttttt);",
+               Style);
 }
 
 TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) {

From 6fcb1c127b407cd6cb731bcc39efb360643b8d25 Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Thu, 22 May 2025 18:50:40 +0800
Subject: [PATCH 43/83] [LoongArch] Fix assertion failure for annotate
 tablejump (#140907)

Fix a use-after-free issue related to annotateTableJump in the LoongArch
target.

Previously, `LoongArchPreRAExpandPseudo::annotateTableJump()` recorded a
reference to a MachineOperand representing a jump table index. However,
later optimizations such as the `BranchFolder` pass may delete the
instruction containing this operand, leaving a dangling reference.

This led to an assertion failure in
`LoongArchAsmPrinter::emitJumpTableInfo()` when trying to access a freed
MachineOperand via `getIndex()`.

The fix avoids holding a reference to the MachineOperand. Instead, we
extract and store the jump table index at the time of annotation. During
`emitJumpTableInfo()`, we verify whether the recorded index still exists
in the MachineFunction's jump table. If not, we skip emission for that
entry.

Fixes #140904

(cherry picked from commit 4e186f20e2f2be2fbf95d9713341a0b6507e707d)
---
 llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp  | 14 +++++++++-----
 .../LoongArch/LoongArchExpandPseudoInsts.cpp       |  3 ++-
 .../LoongArch/LoongArchMachineFunctionInfo.h       | 10 ++++------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
index 895a8e2646692..9a383f0a79a5c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -265,13 +265,16 @@ void LoongArchAsmPrinter::emitJumpTableInfo() {
 
   assert(TM.getTargetTriple().isOSBinFormatELF());
 
-  unsigned Size = getDataLayout().getPointerSize();
   auto *LAFI = MF->getInfo<LoongArchMachineFunctionInfo>();
   unsigned EntrySize = LAFI->getJumpInfoSize();
+  auto JTI = MF->getJumpTableInfo();
 
-  if (0 == EntrySize)
+  if (!JTI || 0 == EntrySize)
     return;
 
+  unsigned Size = getDataLayout().getPointerSize();
+  auto JT = JTI->getJumpTables();
+
   // Emit an additional section to store the correlation info as pairs of
   // addresses, each pair contains the address of a jump instruction (jr) and
   // the address of the jump table.
@@ -279,14 +282,15 @@ void LoongArchAsmPrinter::emitJumpTableInfo() {
       ".discard.tablejump_annotate", ELF::SHT_PROGBITS, 0));
 
   for (unsigned Idx = 0; Idx < EntrySize; ++Idx) {
+    int JTIIdx = LAFI->getJumpInfoJTIIndex(Idx);
+    if (JT[JTIIdx].MBBs.empty())
+      continue;
     OutStreamer->emitValue(
         MCSymbolRefExpr::create(LAFI->getJumpInfoJrMI(Idx)->getPreInstrSymbol(),
                                 OutContext),
         Size);
     OutStreamer->emitValue(
-        MCSymbolRefExpr::create(
-            GetJTISymbol(LAFI->getJumpInfoJTIMO(Idx)->getIndex()), OutContext),
-        Size);
+        MCSymbolRefExpr::create(GetJTISymbol(JTIIdx), OutContext), Size);
   }
 }
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index c2d73a260b1c1..2107908be34ca 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -638,7 +638,8 @@ void LoongArchPreRAExpandPseudo::annotateTableJump(
         if (MO.isJTI()) {
           MBBI->setPreInstrSymbol(
               *MF, MF->getContext().createNamedTempSymbol("jrtb_"));
-          MF->getInfo<LoongArchMachineFunctionInfo>()->setJumpInfo(&*MBBI, &MO);
+          MF->getInfo<LoongArchMachineFunctionInfo>()->setJumpInfo(
+              &*MBBI, MO.getIndex());
           IsFound = true;
           return;
         }
diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h
index daa47c4dc7e32..904985c189dba 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h
@@ -41,7 +41,7 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo {
 
   /// Pairs of `jr` instructions and corresponding JTI operands, used for the
   /// `annotate-tablejump` option.
-  SmallVector<std::pair<MachineInstr *, MachineOperand *>, 4> JumpInfos;
+  SmallVector<std::pair<MachineInstr *, int>, 4> JumpInfos;
 
 public:
   LoongArchMachineFunctionInfo(const Function &F,
@@ -76,14 +76,12 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo {
     return is_contained(SExt32Registers, Reg);
   }
 
-  void setJumpInfo(MachineInstr *JrMI, MachineOperand *JTIMO) {
-    JumpInfos.push_back(std::make_pair(JrMI, JTIMO));
+  void setJumpInfo(MachineInstr *JrMI, int JTIIdx) {
+    JumpInfos.push_back(std::make_pair(JrMI, JTIIdx));
   }
   unsigned getJumpInfoSize() { return JumpInfos.size(); }
   MachineInstr *getJumpInfoJrMI(unsigned Idx) { return JumpInfos[Idx].first; }
-  MachineOperand *getJumpInfoJTIMO(unsigned Idx) {
-    return JumpInfos[Idx].second;
-  }
+  int getJumpInfoJTIIndex(unsigned Idx) { return JumpInfos[Idx].second; }
 };
 
 } // end namespace llvm

From 53393e26d5f4e93c88c4c1fb737b30658de49992 Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Thu, 22 May 2025 18:49:27 +0800
Subject: [PATCH 44/83] [LoongArch] Prevent R0/R1 allocation for rj operand of
 [G]CSRXCHG (#140862)

The `[G]CSRXCHG` instruction must not use R0 or R1 as the `rj` operand,
as encoding `rj` as 0 or 1 will be interpreted as `[G]CSRRD` OR
`[G]CSRWR`, respectively, rather than `[G]CSRXCHG`.

This patch introduces a new register class `GPRNoR0R1` and updates the
`[G]CSRXCHG` instruction definition to use it for the `rj` operand,
ensuring the register allocator avoids assigning R0 or R1.

Fixes #140842

(cherry picked from commit bd8578c3574d77bc1231f047bced4a0053a1b000)
---
 .../AsmParser/LoongArchAsmParser.cpp          |  3 +++
 .../Disassembler/LoongArchDisassembler.cpp    |  8 +++++++
 .../Target/LoongArch/LoongArchInstrInfo.td    |  6 ++---
 .../Target/LoongArch/LoongArchLVZInstrInfo.td |  2 +-
 .../Target/LoongArch/LoongArchRegisterInfo.td |  5 ++++
 .../CodeGen/LoongArch/csrxchg-intrinsic.ll    | 24 +++++++++++++++++++
 6 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll

diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 420b98b8a9c1f..f31d85305bbbe 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -1663,6 +1663,9 @@ LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
     return Match_Success;
   }
 
+  if (Kind == MCK_GPRNoR0R1 && (Reg == LoongArch::R0 || Reg == LoongArch::R1))
+    return Match_RequiresOpnd2NotR0R1;
+
   return Match_InvalidOperand;
 }
 
diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
index 5963208691f72..761682423fffe 100644
--- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
+++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
@@ -62,6 +62,14 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus
+DecodeGPRNoR0R1RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+                             const MCDisassembler *Decoder) {
+  if (RegNo <= 1)
+    return MCDisassembler::Fail;
+  return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
 static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
                                              uint64_t Address,
                                              const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 9b93a9f824726..00e8548071182 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -2351,7 +2351,7 @@ let hasSideEffects = 1, Constraints = "$rd = $dst" in {
 def CSRWR : FmtCSR<0x04000020, (outs GPR:$dst),
                    (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">;
 def CSRXCHG : FmtCSRXCHG<0x04000000, (outs GPR:$dst),
-                         (ins GPR:$rd, GPR:$rj, uimm14:$csr_num),
+                         (ins GPR:$rd, GPRNoR0R1:$rj, uimm14:$csr_num),
                          "$rd, $rj, $csr_num">;
 } // hasSideEffects = 1, Constraints = "$rd = $dst"
 
@@ -2398,8 +2398,8 @@ def IDLE : MISC_I15<0x06488000>;
 def : Pat<(loongarch_csrrd uimm14:$imm14), (CSRRD uimm14:$imm14)>;
 def : Pat<(loongarch_csrwr GPR:$rd, uimm14:$imm14),
           (CSRWR GPR:$rd, uimm14:$imm14)>;
-def : Pat<(loongarch_csrxchg GPR:$rd, GPR:$rj, uimm14:$imm14),
-          (CSRXCHG GPR:$rd, GPR:$rj, uimm14:$imm14)>;
+def : Pat<(loongarch_csrxchg GPR:$rd, GPRNoR0R1:$rj, uimm14:$imm14),
+          (CSRXCHG GPR:$rd, GPRNoR0R1:$rj, uimm14:$imm14)>;
 
 def : Pat<(loongarch_iocsrrd_b GPR:$rj), (IOCSRRD_B GPR:$rj)>;
 def : Pat<(loongarch_iocsrrd_h GPR:$rj), (IOCSRRD_H GPR:$rj)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td
index 50a16e2dd56b9..07b77ee971f27 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td
@@ -23,7 +23,7 @@ let Constraints = "$rd = $dst" in {
 def GCSRWR : FmtCSR<0x05000020, (outs GPR:$dst),
                     (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">;
 def GCSRXCHG : FmtCSRXCHG<0x05000000, (outs GPR:$dst),
-                          (ins GPR:$rd, GPR:$rj, uimm14:$csr_num),
+                          (ins GPR:$rd, GPRNoR0R1:$rj, uimm14:$csr_num),
                           "$rd, $rj, $csr_num">;
 } // Constraints = "$rd = $dst"
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
index a8419980868ee..2a8cdf953e00f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
@@ -127,6 +127,11 @@ def GPRT : GPRRegisterClass<(add // a0...a7, t0...t8
 // prediction.
 def GPRJR : GPRRegisterClass<(sub GPR, R1)>;
 
+// Don't use R0 or R1 for the rj operand of [G]CSRXCHG, because when rj is
+// encoded as 0 or 1, the instruction is interpreted as [G]CSRRD or [G]CSRWR,
+// respectively, rather than [G]CSRXCHG.
+def GPRNoR0R1 : GPRRegisterClass<(sub GPR, R0, R1)>;
+
 // Floating point registers
 
 let RegAltNameIndices = [RegAliasName] in {
diff --git a/llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll b/llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll
new file mode 100644
index 0000000000000..2f38b3a8c7ad1
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll
@@ -0,0 +1,24 @@
+; RUN: llc --mtriple=loongarch32 --mattr=+f --verify-machineinstrs < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+f --verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg)
+
+;; Check that the rj operand of csrxchg is not R0.
+define void @csrxchg_w_rj_not_r0(i32 signext %a) {
+; CHECK-NOT:    csrxchg ${{[a-z]*}}, $r0, 0
+; CHECK-NOT:    csrxchg ${{[a-z]*}}, $zero, 0
+entry:
+  %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 0, i32 0)
+  ret void
+}
+
+;; Check that the rj operand of csrxchg is not R1.
+define i32 @csrxchg_w_rj_not_r1() {
+; CHECK-NOT:    csrxchg ${{[a-z]*}}, $r1, 0
+; CHECK-NOT:    csrxchg ${{[a-z]*}}, $ra, 0
+entry:
+  %0 = tail call i32 asm "", "=r,r,i,{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},{r16},{r17},{r18},{r19},{r20},{r23},{r24},{r25},{r26},{r27},{r28},{r29},{r30},{r31},0"(i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %1 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %0, i32 4, i32 0)
+  %2 = tail call i32 asm "", "=r,r,i,{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},{r16},{r17},{r18},{r19},{r20},{r23},{r24},{r25},{r26},{r27},{r28},{r29},{r30},{r31},0"(i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %1)
+  ret i32 %2
+}

From aa804fd3e624cb92c6e7665182504c6049387f35 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 28 Apr 2025 13:45:11 -0700
Subject: [PATCH 45/83] [sanitizer_common] Remove interceptors for deprecated
 struct termio (#137403)

This struct will be removed from glibc-2.42 and has been deprecated for
a very long time.

Fixes #137321

(cherry picked from commit 59978b21ad9c65276ee8e14f26759691b8a65763)
---
 .../sanitizer_common_interceptors_ioctl.inc               | 8 --------
 .../sanitizer_common/sanitizer_platform_limits_posix.cpp  | 3 ---
 .../sanitizer_common/sanitizer_platform_limits_posix.h    | 1 -
 3 files changed, 12 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
index 49ec4097c900b..dda11daa77f49 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
@@ -338,17 +338,9 @@ static void ioctl_table_fill() {
   _(SOUND_PCM_WRITE_CHANNELS, WRITE, sizeof(int));
   _(SOUND_PCM_WRITE_FILTER, WRITE, sizeof(int));
   _(TCFLSH, NONE, 0);
-#if SANITIZER_GLIBC
-  _(TCGETA, WRITE, struct_termio_sz);
-#endif
   _(TCGETS, WRITE, struct_termios_sz);
   _(TCSBRK, NONE, 0);
   _(TCSBRKP, NONE, 0);
-#if SANITIZER_GLIBC
-  _(TCSETA, READ, struct_termio_sz);
-  _(TCSETAF, READ, struct_termio_sz);
-  _(TCSETAW, READ, struct_termio_sz);
-#endif
   _(TCSETS, READ, struct_termios_sz);
   _(TCSETSF, READ, struct_termios_sz);
   _(TCSETSW, READ, struct_termios_sz);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index ec5f2edab6a64..b3e717591d6c7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -485,9 +485,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned struct_input_id_sz = sizeof(struct input_id);
   unsigned struct_mtpos_sz = sizeof(struct mtpos);
   unsigned struct_rtentry_sz = sizeof(struct rtentry);
-#if SANITIZER_GLIBC || SANITIZER_ANDROID
-  unsigned struct_termio_sz = sizeof(struct termio);
-#endif
   unsigned struct_vt_consize_sz = sizeof(struct vt_consize);
   unsigned struct_vt_sizes_sz = sizeof(struct vt_sizes);
   unsigned struct_vt_stat_sz = sizeof(struct vt_stat);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 1a7d9e64048eb..005ff27624629 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -1043,7 +1043,6 @@ extern unsigned struct_hd_geometry_sz;
 extern unsigned struct_input_absinfo_sz;
 extern unsigned struct_input_id_sz;
 extern unsigned struct_mtpos_sz;
-extern unsigned struct_termio_sz;
 extern unsigned struct_vt_consize_sz;
 extern unsigned struct_vt_sizes_sz;
 extern unsigned struct_vt_stat_sz;

From 5d99a97583e148e2c0ad462bb35292366105e188 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 May 2025 09:43:00 +0200
Subject: [PATCH 46/83] [MachO] Improve bounds check (#141083)

The current check may fail if the addition overflows. I've observed
failures of macho-invalid.test on 32-bit due to this.

Instead, compare against the remaining bytes until the end of the
object.

(cherry picked from commit 3f29acb51739a3e6bfb8cc623eb37cb734c98a63)
---
 llvm/lib/Object/MachOObjectFile.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 69d36e6a77db7..5db264207ffb7 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -192,7 +192,8 @@ static Expected<MachOObjectFile::LoadCommandInfo>
 getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr,
                    uint32_t LoadCommandIndex) {
   if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) {
-    if (CmdOrErr->cmdsize + Ptr > Obj.getData().end())
+    assert(Ptr <= Obj.getData().end() && "Start must be before end");
+    if (CmdOrErr->cmdsize > (uintptr_t)(Obj.getData().end() - Ptr))
       return malformedError("load command " + Twine(LoadCommandIndex) +
                             " extends past end of file");
     if (CmdOrErr->cmdsize < 8)

From 0e1ef696f1fedd94ac43cafcd09b4ac0071c44be Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Sat, 26 Apr 2025 17:06:04 +0200
Subject: [PATCH 47/83] [Driver] Fix _XOPEN_SOURCE definition on Solaris
 (#137141)

Since commit 613a077b05b8352a48695be295037306f5fca151, `flang` doesn't
build any longer on Solaris/amd64:
```
flang/lib/Evaluate/intrinsics-library.cpp:225:26:
error: address of overloaded function 'acos' does not match required type '__float128 (__float128)'
  225 |       FolderFactory<F, F{std::acos}>::Create("acos"),
      |                          ^~~~~~~~~
```
That patch led to the version of `quadmath.h` deep inside `/usr/gcc/<N>`
to be found, thus `HAS_QUADMATHLIB` is defined. However, the `struct
HostRuntimeLibrary<__float128, LibraryVersion::Libm>` template is
guarded by `_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600`, while
`clang` only predefines `_XOPEN_SOURCE=500`.

This code dates back to commit 0c1941cb055fcf008e17faa6605969673211bea3
back in 2012. Currently, this is long obsolete and `gcc` prefefines
`_XOPEN_SOURCE=600` instead since GCC 4.6 back in 2011.

This patch follows that.

Tested on `amd64-pc-solaris2.11` and `sparcv9-sun-solaris2.11`.

(cherry picked from commit e71c8ea3cc73c8f7b0382468f355a254166d3a72)
---
 clang/lib/Basic/Targets/OSTargets.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 991efd2bde01f..4cf4230273d38 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -618,14 +618,7 @@ class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo<Target> {
     DefineStd(Builder, "unix", Opts);
     Builder.defineMacro("__svr4__");
     Builder.defineMacro("__SVR4");
-    // Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and
-    // newer, but to 500 for everything else.  feature_test.h has a check to
-    // ensure that you are not using C99 with an old version of X/Open or C89
-    // with a new version.
-    if (Opts.C99)
-      Builder.defineMacro("_XOPEN_SOURCE", "600");
-    else
-      Builder.defineMacro("_XOPEN_SOURCE", "500");
+    Builder.defineMacro("_XOPEN_SOURCE", "600");
     if (Opts.CPlusPlus) {
       Builder.defineMacro("__C99FEATURES__");
       Builder.defineMacro("_FILE_OFFSET_BITS", "64");

From 4b6e5a28665348256609945fda55f4bde847433c Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Sun, 25 May 2025 15:40:45 -0700
Subject: [PATCH 48/83] [clang-format] Handle Java text blocks (#141334)

Fix #61954

(cherry picked from commit b7f5950bb3b97eac979925a3bbf015530c26962e)
---
 clang/lib/Format/FormatTokenLexer.cpp     | 33 +++++++++++++
 clang/lib/Format/FormatTokenLexer.h       |  2 +
 clang/unittests/Format/FormatTestJava.cpp | 57 +++++++++++++++++++++++
 3 files changed, 92 insertions(+)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 16f0a76f3a954..0755a5d355394 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -636,6 +636,36 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
   return true;
 }
 
+void FormatTokenLexer::tryParseJavaTextBlock() {
+  if (FormatTok->TokenText != "\"\"")
+    return;
+
+  const auto *S = Lex->getBufferLocation();
+  const auto *End = Lex->getBuffer().end();
+
+  if (S == End || *S != '\"')
+    return;
+
+  ++S; // Skip the `"""` that begins a text block.
+
+  // Find the `"""` that ends the text block.
+  for (int Count = 0; Count < 3 && S < End; ++S) {
+    switch (*S) {
+    case '\\':
+      Count = -1;
+      break;
+    case '\"':
+      ++Count;
+      break;
+    default:
+      Count = 0;
+    }
+  }
+
+  // Ignore the possibly invalid text block.
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));
+}
+
 // Tries to parse a JavaScript Regex literal starting at the current token,
 // if that begins with a slash and is in a location where JavaScript allows
 // regex literals. Changes the current token to a regex literal and updates
@@ -1326,6 +1356,9 @@ FormatToken *FormatTokenLexer::getNextToken() {
     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
     ++Column;
     StateStack.push(LexerState::TOKEN_STASHED);
+  } else if (Style.Language == FormatStyle::LK_Java &&
+             FormatTok->is(tok::string_literal)) {
+    tryParseJavaTextBlock();
   }
 
   if (Style.isVerilog() && Tokens.size() > 0 &&
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 61474a3f9ada8..d9a25c8ef3538 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -71,6 +71,8 @@ class FormatTokenLexer {
 
   bool canPrecedeRegexLiteral(FormatToken *Prev);
 
+  void tryParseJavaTextBlock();
+
   // Tries to parse a JavaScript Regex literal starting at the current token,
   // if that begins with a slash and is in a location where JavaScript allows
   // regex literals. Changes the current token to a regex literal and updates
diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp
index 33998bc7ff858..d0a3b4eb96d69 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -789,6 +789,63 @@ TEST_F(FormatTestJava, AlignCaseArrows) {
                Style);
 }
 
+TEST_F(FormatTestJava, TextBlock) {
+  verifyNoChange("String myStr = \"\"\"\n"
+                 "hello\n"
+                 "there\n"
+                 "\"\"\";");
+
+  verifyNoChange("String tb = \"\"\"\n"
+                 "            the new\"\"\";");
+
+  verifyNoChange("System.out.println(\"\"\"\n"
+                 "    This is the first line\n"
+                 "    This is the second line\n"
+                 "    \"\"\");");
+
+  verifyNoChange("void writeHTML() {\n"
+                 "  String html = \"\"\" \n"
+                 "                <html>\n"
+                 "                    <p>Hello World.</p>\n"
+                 "                </html>\n"
+                 "\"\"\";\n"
+                 "  writeOutput(html);\n"
+                 "}");
+
+  verifyNoChange("String colors = \"\"\"\t\n"
+                 "    red\n"
+                 "    green\n"
+                 "    blue\"\"\".indent(4);");
+
+  verifyNoChange("String code = \"\"\"\n"
+                 "    String source = \\\"\"\"\n"
+                 "        String message = \"Hello, World!\";\n"
+                 "        System.out.println(message);\n"
+                 "        \\\"\"\";\n"
+                 "    \"\"\";");
+
+  verifyNoChange(
+      "class Outer {\n"
+      "  void printPoetry() {\n"
+      "    String lilacs = \"\"\"\n"
+      "Passing the apple-tree blows of white and pink in the orchards\n"
+      "\"\"\";\n"
+      "    System.out.println(lilacs);\n"
+      "  }\n"
+      "}");
+
+  verifyNoChange("String name = \"\"\"\r\n"
+                 "        red\n"
+                 "        green\n"
+                 "        blue\\\n"
+                 "    \"\"\";");
+
+  verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";");
+
+  verifyNoChange("String name = \"\"\"\n"
+                 "              Pat Q. Smith");
+}
+
 } // namespace
 } // namespace test
 } // namespace format

From 47addd4540b4c393e478ba92bea2589e330c57fb Mon Sep 17 00:00:00 2001
From: Karol Herbst <kherbst@redhat.com>
Date: Wed, 21 May 2025 15:57:38 +0200
Subject: [PATCH 49/83] [libclc] Include isnan implementation for SPIR-V
 targets

The fma software emulation requires it.

Similar to https://github.com/llvm/llvm-project/pull/124614
---
 libclc/clc/lib/spirv/SOURCES   | 1 +
 libclc/clc/lib/spirv64/SOURCES | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index 509236d587cd0..f97a1407f6631 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -10,5 +10,6 @@
 ../generic/math/clc_nextafter.cl
 ../generic/math/clc_rint.cl
 ../generic/math/clc_trunc.cl
+../generic/relational/clc_isnan.cl
 ../generic/relational/clc_select.cl
 ../generic/shared/clc_clamp.cl
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
index 509236d587cd0..f97a1407f6631 100644
--- a/libclc/clc/lib/spirv64/SOURCES
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -10,5 +10,6 @@
 ../generic/math/clc_nextafter.cl
 ../generic/math/clc_rint.cl
 ../generic/math/clc_trunc.cl
+../generic/relational/clc_isnan.cl
 ../generic/relational/clc_select.cl
 ../generic/shared/clc_clamp.cl

From 7cf14539b644fd29d1c2a60f335d80d34f269f21 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 28 May 2025 11:32:12 -0700
Subject: [PATCH 50/83] Bump version to 20.1.7

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 40ba164763d60..1887043b7612a 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 6)
+  set(LLVM_VERSION_PATCH 7)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)

From 7759bb57c24390797ee34fa58a5e1234f5aa9369 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov <mizvekov@gmail.com>
Date: Thu, 29 May 2025 00:28:21 -0300
Subject: [PATCH 51/83] [clang] Serialization: support hashing null template
 arguments

When performing overload resolution during code completion,
clang will allow incomplete substitutions in more places
than would be allowed for valid code, because for completion to work well,
it needs clang to keep going so it can explore the space of possibilities.

Notably, we accept instantiating declarations will null template arguments,
and this works fine, except that when lazily loading serialzied templated
declarations, the template argument hasher assumes null arguments can't
be used.

This patch makes the hasher happily accept that.

Fixes https://github.com/llvm/llvm-project/issues/139019
---
 clang/docs/ReleaseNotes.rst                   |  1 +
 .../Serialization/TemplateArgumentHasher.cpp  |  4 ++-
 clang/test/CodeCompletion/GH139019.cpp        | 26 +++++++++++++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeCompletion/GH139019.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7b84210fddab3..262bf4e3d4f5b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1118,6 +1118,7 @@ Miscellaneous Clang Crashes Fixed
 
 - Fixed a crash when an unscoped enumeration declared by an opaque-enum-declaration within a class template
   with a dependent underlying type is subject to integral promotion. (#GH117960)
+- Fix code completion crash involving PCH serialzied templates. (#GH139019)
 
 OpenACC Specific Changes
 ------------------------
diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp
index 598f098f526d0..5fd6941256fe2 100644
--- a/clang/lib/Serialization/TemplateArgumentHasher.cpp
+++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp
@@ -65,7 +65,9 @@ void TemplateArgumentHasher::AddTemplateArgument(TemplateArgument TA) {
 
   switch (Kind) {
   case TemplateArgument::Null:
-    llvm_unreachable("Expected valid TemplateArgument");
+    // These can occur in incomplete substitutions performed with code
+    // completion (see PartialOverloading).
+    break;
   case TemplateArgument::Type:
     AddQualType(TA.getAsType());
     break;
diff --git a/clang/test/CodeCompletion/GH139019.cpp b/clang/test/CodeCompletion/GH139019.cpp
new file mode 100644
index 0000000000000..fed35b38362a1
--- /dev/null
+++ b/clang/test/CodeCompletion/GH139019.cpp
@@ -0,0 +1,26 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/test.hpp -emit-pch -o %t/1.pch
+// RUN: %clang_cc1 -std=c++20 %t/test.cpp -include-pch %t/1.pch -code-completion-at=%t/test.cpp:7:17
+
+//--- test.hpp
+#pragma once
+class provider_t
+{
+  public:
+    template<class T>
+    void emit(T *data)
+    {}
+};
+
+//--- test.cpp
+#include "test.hpp"
+
+void test()
+{
+    provider_t *focus;
+    void *data;
+    focus->emit(&data);
+}

From e0586e278f96e4f104c8b8039a41c6832c35be22 Mon Sep 17 00:00:00 2001
From: dianqk <dianqk@dianqk.net>
Date: Sun, 1 Jun 2025 22:17:57 +0800
Subject: [PATCH 52/83] [RelLookupTableConverter] Drop unnamed_addr to avoid
 generating GOTPCREL relocations (#142304)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow
https://github.com/llvm/llvm-project/pull/72584#issuecomment-2925759253,
the patch will drop the `unnamed_addr` attribute when generating
relative lookup tables. I'm not very confident about this patch, but it
does resolve https://github.com/rust-lang/rust/issues/140686,
https://github.com/rust-lang/rust/issues/141306 and
https://github.com/rust-lang/rust/issues/141737.

But I don't think this will result in worse problems.

> LLVM provides that the calculation of such a constant initializer will
not overflow at link time under the medium code model if x is an
unnamed_addr function. However, it does not provide this guarantee for a
constant initializer folded into a function body. This intrinsic can be
used to avoid the possibility of overflows when loading from such a
constant. ([‘llvm.load.relative’
Intrinsic](https://llvm.org/docs/LangRef.html#id2592))

This is my concern. I'm not sure how unnamed_addr provides this
guarantee, and I haven't found any test cases.

(cherry picked from commit aa09dbbbcfd4724bd04eea48763a1322f659637d)
---
 .../Utils/RelLookupTableConverter.cpp         |  17 +++
 .../RelLookupTableConverter/unnamed_addr.ll   | 119 ++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll

diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index 2700b4307308c..4486cba2bf6c0 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/TargetParser/Triple.h"
 
 using namespace llvm;
 
@@ -108,8 +109,24 @@ static GlobalVariable *createRelLookupTable(Function &Func,
   uint64_t Idx = 0;
   SmallVector<Constant *, 64> RelLookupTableContents(NumElts);
 
+  Triple TT(M.getTargetTriple());
+  // FIXME: This should be removed in the future.
+  bool ShouldDropUnnamedAddr =
+      // Drop unnamed_addr to avoid matching pattern in
+      // `handleIndirectSymViaGOTPCRel`, which generates GOTPCREL relocations
+      // not supported by the GNU linker and LLD versions below 18 on aarch64.
+      TT.isAArch64()
+      // Apple's ld64 (and ld-prime on Xcode 15.2) miscompile something on
+      // x86_64-apple-darwin. See
+      // https://github.com/rust-lang/rust/issues/140686 and
+      // https://github.com/rust-lang/rust/issues/141306.
+      || (TT.isX86() && TT.isOSDarwin());
+
   for (Use &Operand : LookupTableArr->operands()) {
     Constant *Element = cast<Constant>(Operand);
+    if (ShouldDropUnnamedAddr)
+      if (auto *GlobalElement = dyn_cast<GlobalValue>(Element))
+        GlobalElement->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
     Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
     Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy);
     Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy);
diff --git a/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll
new file mode 100644
index 0000000000000..78b8a4aa126c9
--- /dev/null
+++ b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; REQUIRES: x86-registered-target
+; REQUIRES: aarch64-registered-target
+; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -mtriple=x86_64-apple-darwin -S | FileCheck -check-prefix=x86_64-apple-darwin %s
+; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -mtriple=aarch64 -S | FileCheck -check-prefix=aarch64 %s
+; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -mtriple=x86_64 -S | FileCheck -check-prefix=x86_64 %s
+
+@a0 = private unnamed_addr constant i32 0
+@a1 = private unnamed_addr constant i32 1
+@a2 = private unnamed_addr constant i32 2
+@load_relative_1.table = private unnamed_addr constant [3 x ptr] [ptr @a0, ptr @a1, ptr @a2]
+
+@x0 = internal unnamed_addr constant i64 0
+@x1 = internal unnamed_addr constant i64 1
+@x2 = internal unnamed_addr constant i64 2
+@x3 = internal unnamed_addr constant i64 3
+@y0 = internal unnamed_addr constant ptr @x3
+@y1 = internal unnamed_addr constant ptr @x2
+@y2 = internal unnamed_addr constant ptr @x1
+@y3 = internal unnamed_addr constant ptr @x0
+@load_relative_2.table = private unnamed_addr constant [4 x ptr] [ptr @y3, ptr @y2, ptr @y1, ptr @y0]
+
+;.
+; x86_64-apple-darwin: @a0 = private constant i32 0
+; x86_64-apple-darwin: @a1 = private constant i32 1
+; x86_64-apple-darwin: @a2 = private constant i32 2
+; x86_64-apple-darwin: @load_relative_1.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a0 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32)], align 4
+; x86_64-apple-darwin: @x0 = internal unnamed_addr constant i64 0
+; x86_64-apple-darwin: @x1 = internal unnamed_addr constant i64 1
+; x86_64-apple-darwin: @x2 = internal unnamed_addr constant i64 2
+; x86_64-apple-darwin: @x3 = internal unnamed_addr constant i64 3
+; x86_64-apple-darwin: @y0 = internal constant ptr @x3
+; x86_64-apple-darwin: @y1 = internal constant ptr @x2
+; x86_64-apple-darwin: @y2 = internal constant ptr @x1
+; x86_64-apple-darwin: @y3 = internal constant ptr @x0
+; x86_64-apple-darwin: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4
+;.
+; aarch64: @a0 = private constant i32 0
+; aarch64: @a1 = private constant i32 1
+; aarch64: @a2 = private constant i32 2
+; aarch64: @load_relative_1.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a0 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32)], align 4
+; aarch64: @x0 = internal unnamed_addr constant i64 0
+; aarch64: @x1 = internal unnamed_addr constant i64 1
+; aarch64: @x2 = internal unnamed_addr constant i64 2
+; aarch64: @x3 = internal unnamed_addr constant i64 3
+; aarch64: @y0 = internal constant ptr @x3
+; aarch64: @y1 = internal constant ptr @x2
+; aarch64: @y2 = internal constant ptr @x1
+; aarch64: @y3 = internal constant ptr @x0
+; aarch64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4
+;.
+; x86_64: @a0 = private unnamed_addr constant i32 0
+; x86_64: @a1 = private unnamed_addr constant i32 1
+; x86_64: @a2 = private unnamed_addr constant i32 2
+; x86_64: @load_relative_1.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a0 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32)], align 4
+; x86_64: @x0 = internal unnamed_addr constant i64 0
+; x86_64: @x1 = internal unnamed_addr constant i64 1
+; x86_64: @x2 = internal unnamed_addr constant i64 2
+; x86_64: @x3 = internal unnamed_addr constant i64 3
+; x86_64: @y0 = internal unnamed_addr constant ptr @x3
+; x86_64: @y1 = internal unnamed_addr constant ptr @x2
+; x86_64: @y2 = internal unnamed_addr constant ptr @x1
+; x86_64: @y3 = internal unnamed_addr constant ptr @x0
+; x86_64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4
+;.
+define ptr @load_relative_1(i64 %offset) {
+; x86_64-apple-darwin-LABEL: define ptr @load_relative_1(
+; x86_64-apple-darwin-SAME: i64 [[OFFSET:%.*]]) {
+; x86_64-apple-darwin-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; x86_64-apple-darwin-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_1.table.rel, i64 [[RELTABLE_SHIFT]])
+; x86_64-apple-darwin-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+; aarch64-LABEL: define ptr @load_relative_1(
+; aarch64-SAME: i64 [[OFFSET:%.*]]) {
+; aarch64-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; aarch64-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_1.table.rel, i64 [[RELTABLE_SHIFT]])
+; aarch64-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+; x86_64-LABEL: define ptr @load_relative_1(
+; x86_64-SAME: i64 [[OFFSET:%.*]]) {
+; x86_64-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; x86_64-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_1.table.rel, i64 [[RELTABLE_SHIFT]])
+; x86_64-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+  %gep = getelementptr inbounds [3 x ptr], ptr @load_relative_1.table, i64 0, i64 %offset
+  %load = load ptr, ptr %gep
+  ret ptr %load
+}
+
+define ptr @load_relative_2(i64 %offset) {
+; x86_64-apple-darwin-LABEL: define ptr @load_relative_2(
+; x86_64-apple-darwin-SAME: i64 [[OFFSET:%.*]]) {
+; x86_64-apple-darwin-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; x86_64-apple-darwin-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_2.table.rel, i64 [[RELTABLE_SHIFT]])
+; x86_64-apple-darwin-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+; aarch64-LABEL: define ptr @load_relative_2(
+; aarch64-SAME: i64 [[OFFSET:%.*]]) {
+; aarch64-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; aarch64-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_2.table.rel, i64 [[RELTABLE_SHIFT]])
+; aarch64-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+; x86_64-LABEL: define ptr @load_relative_2(
+; x86_64-SAME: i64 [[OFFSET:%.*]]) {
+; x86_64-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; x86_64-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_2.table.rel, i64 [[RELTABLE_SHIFT]])
+; x86_64-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+  %gep = getelementptr inbounds [4 x ptr], ptr @load_relative_2.table, i64 0, i64 %offset
+  %load = load ptr, ptr %gep
+  ret ptr %load
+}
+;.
+; x86_64-apple-darwin: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+;.
+; aarch64: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+;.
+; x86_64: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+;.

From f6532710ace867e5660ea9e47ab77f2b8349896f Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Mon, 2 Jun 2025 13:35:27 -0700
Subject: [PATCH 53/83] [clang-format] Correctly annotate token-pasted function
 decl names (#142337)

Fix #142178

(cherry picked from commit 7bf5862dbfda590282f50b14e6d7d5f990bf1900)
---
 clang/lib/Format/TokenAnnotator.cpp           | 2 ++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 11b941c5a0411..0c13356ca96de 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3839,6 +3839,8 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
   } else {
     if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0)
       return false;
+    while (Next && Next->startsSequence(tok::hashhash, tok::identifier))
+      Next = Next->Next->Next;
     for (; Next; Next = Next->Next) {
       if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
         Next = Next->MatchingParen;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 757db66c3e298..602c2d5eba29a 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2187,6 +2187,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) {
   EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName);
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen);
 
+  Tokens = annotate("#define FUNC(foo, bar, baz) \\\n"
+                    "  auto foo##bar##baz() -> Type {}");
+  ASSERT_EQ(Tokens.size(), 23u) << Tokens;
+  EXPECT_TOKEN(Tokens[11], tok::identifier, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[16], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[18], tok::arrow, TT_TrailingReturnArrow);
+
   Tokens = annotate("int iso_time(time_t);");
   ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName);

From e5dd4f129454b518ab5f8d4011997746490bb3a3 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Mon, 9 Jun 2025 00:33:20 -0400
Subject: [PATCH 54/83] [clangd] Guard against trivial FunctionProtoTypeLoc
 when creating inlay hints (#143087)

Fixes https://github.com/llvm/llvm-project/issues/142608

(cherry picked from commit 392bd577e37d795224da6fefc4b621a3f117105e)
---
 clang-tools-extra/clangd/InlayHints.cpp               | 7 ++++++-
 clang-tools-extra/clangd/unittests/InlayHintTests.cpp | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp
index 1b1bcf78c9855..a2b856ad30519 100644
--- a/clang-tools-extra/clangd/InlayHints.cpp
+++ b/clang-tools-extra/clangd/InlayHints.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/identity.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
@@ -368,7 +369,11 @@ static FunctionProtoTypeLoc getPrototypeLoc(Expr *Fn) {
   }
 
   if (auto F = Target.getAs<FunctionProtoTypeLoc>()) {
-    return F;
+    // In some edge cases the AST can contain a "trivial" FunctionProtoTypeLoc
+    // which has null parameters. Avoid these as they don't contain useful
+    // information.
+    if (llvm::all_of(F.getParams(), llvm::identity<ParmVarDecl *>()))
+      return F;
   }
 
   return {};
diff --git a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
index 77d78b8777fe3..8ed8401f9fce9 100644
--- a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
+++ b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
@@ -997,11 +997,16 @@ TEST(ParameterHints, FunctionPointer) {
     f3_t f3;
     using f4_t = void(__stdcall *)(int param);
     f4_t f4;
+    __attribute__((noreturn)) f4_t f5;
     void bar() {
       f1($f1[[42]]);
       f2($f2[[42]]);
       f3($f3[[42]]);
       f4($f4[[42]]);
+      // This one runs into an edge case in clang's type model
+      // and we can't extract the parameter name. But at least
+      // we shouldn't crash.
+      f5(42);
     }
   )cpp",
       ExpectedHint{"param: ", "f1"}, ExpectedHint{"param: ", "f2"},

From 22a3e6b194092182c592bdf48eef9d37c53b0cbf Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 29 May 2025 10:22:24 +0100
Subject: [PATCH 55/83] release/20.x: [AArch64] Handle XAR with v1i64 operand
 types (#141754)

When converting ROTR(XOR(a, b)) to XAR(a, b), or ROTR(a, a) to XAR(a, zero)
we were not handling v1i64 types, meaning illegal copies get generated. This
addresses that by generating insert_subreg and extract_subreg for v1i64 to
keep the values with the correct types.

Fixes #141746
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 24 ++++++++++++++++++-
 llvm/test/CodeGen/AArch64/xar.ll              | 20 ++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1387a224fa660..0aad7665f6216 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4608,9 +4608,31 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
   if (ShAmt + HsAmt != 64)
     return false;
 
+  // If the input is a v1i64, widen to a v2i64 to use XAR.
+  assert((VT == MVT::v1i64 || VT == MVT::v2i64) && "Unexpected XAR type!");
+  if (VT == MVT::v1i64) {
+    EVT SVT = MVT::v2i64;
+    SDValue Undef =
+        SDValue(CurDAG->getMachineNode(AArch64::IMPLICIT_DEF, DL, SVT), 0);
+    SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+    R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
+                                        R1, DSub),
+                 0);
+    if (R2.getValueType() == MVT::v1i64)
+      R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
+                                          Undef, R2, DSub),
+                   0);
+  }
+
   SDValue Ops[] = {R1, R2, Imm};
-  CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
+  SDNode *XAR = CurDAG->getMachineNode(AArch64::XAR, DL, MVT::v2i64, Ops);
 
+  if (VT == MVT::v1i64) {
+    SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+    XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
+                                 SDValue(XAR, 0), DSub);
+  }
+  ReplaceNode(N, XAR);
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/xar.ll b/llvm/test/CodeGen/AArch64/xar.ll
index d050eaf6646de..5666ab35cde48 100644
--- a/llvm/test/CodeGen/AArch64/xar.ll
+++ b/llvm/test/CodeGen/AArch64/xar.ll
@@ -19,4 +19,24 @@ define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) {
     ret <2 x i64> %b
 }
 
+define <1 x i64> @xar_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; SHA3-LABEL: xar_v1i64:
+; SHA3:       // %bb.0:
+; SHA3-NEXT:    // kill: def $d0 killed $d0 def $q0
+; SHA3-NEXT:    // kill: def $d1 killed $d1 def $q1
+; SHA3-NEXT:    xar v0.2d, v0.2d, v1.2d, #63
+; SHA3-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; SHA3-NEXT:    ret
+;
+; NOSHA3-LABEL: xar_v1i64:
+; NOSHA3:       // %bb.0:
+; NOSHA3-NEXT:    eor v1.8b, v0.8b, v1.8b
+; NOSHA3-NEXT:    shl d0, d1, #1
+; NOSHA3-NEXT:    usra d0, d1, #63
+; NOSHA3-NEXT:    ret
+  %v.val = xor <1 x i64> %a, %b
+  %fshl = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %v.val, <1 x i64> %v.val, <1 x i64> splat (i64 1))
+  ret <1 x i64> %fshl
+}
+
 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)

From 2481e590eec725f2ad6b4945eebb978f93578404 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Mon, 2 Jun 2025 10:52:10 +0100
Subject: [PATCH 56/83] [AArch64][SME] Fix accessing the emergency spill slot
 with hazard padding (#142190)

This patch fixes an issue where when hazard padding was enabled locals,
including the emergency spill slot, could not be directly addressed.

Generally, this is fine, we can materialize the constant offset in a
scratch register, but if there's no register free we need to spill, and
if we can't even reach the emergency spill slot then we fail to compile.

This patch fixes this by ensuring that if a function has variable-sized
objects and is likely to have hazard padding we enable the base pointer.
Then if we know a function has hazard padding, place the emergency spill
slot next to the BP/SP, to ensure it can be directly accessed without
stepping over any hazard padding.

(cherry picked from commit b5cf03033251a642b184b2e0ea6bdac171c17702)
---
 .../Target/AArch64/AArch64RegisterInfo.cpp    | 19 +++-
 ...ramelayout-scavengingslot-stack-hazard.mir | 99 +++++++++++++++++++
 llvm/test/CodeGen/AArch64/stack-hazard.ll     | 30 +++---
 3 files changed, 130 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir

diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index e9730348ba58e..367f6b626b420 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64InstPrinter.h"
+#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -615,14 +616,27 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
       return true;
 
     auto &ST = MF.getSubtarget<AArch64Subtarget>();
+    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     if (ST.hasSVE() || ST.isStreaming()) {
-      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
       // Frames that have variable sized objects and scalable SVE objects,
       // should always use a basepointer.
       if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
         return true;
     }
 
+    // Frames with hazard padding can have a large offset between the frame
+    // pointer and GPR locals, which includes the emergency spill slot. If the
+    // emergency spill slot is not within range of the load/store instructions
+    // (which have a signed 9-bit range), we will fail to compile if it is used.
+    // Since hasBasePointer() is called before we know if we have hazard padding
+    // or an emergency spill slot we need to enable the basepointer
+    // conservatively.
+    if (AFI->hasStackHazardSlotIndex() ||
+        (ST.getStreamingHazardSize() &&
+         !SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody())) {
+      return true;
+    }
+
     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
@@ -747,7 +761,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
   assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
           AFI->hasCalculatedStackSizeSVE()) &&
          "Expected SVE area to be calculated by this point");
-  return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE();
+  return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
+         !AFI->hasStackHazardSlotIndex();
 }
 
 bool AArch64RegisterInfo::requiresFrameIndexScavenging(
diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
new file mode 100644
index 0000000000000..52ac36f801854
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
@@ -0,0 +1,99 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-stack-hazard-size=1024 -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+
+  define void @stack_hazard_streaming_compat() "aarch64_pstate_sm_compatible" { entry: unreachable }
+  define void @stack_hazard_streaming_compat_emergency_spill_slot() "aarch64_pstate_sm_compatible" { entry: unreachable }
+
+...
+
+# +------------------+
+# | GPR callee-saves |
+# +------------------+ <- FP
+# | <hazard padding> |
+# +------------------+
+# | FPR locals       |
+# | %stack.1         |
+# +------------------+
+# | <hazard padding> |
+# +------------------+
+# | GPR locals       |
+# | %stack.2         |
+# | <emergency spill>|
+# +------------------+ <- BP
+# | <VLA>            |
+# +------------------+ <- SP (can't be used due to VLA)
+
+# In this case without the base pointer we'd need the emergency spill slot to
+# access both %stack.1 and %stack.2. With the base pointer we can reach both
+# without spilling.
+
+name: stack_hazard_streaming_compat
+# CHECK-LABEL: name: stack_hazard_streaming_compat
+# CHECK: bb.0:
+# CHECK:      STRDui $d0, $x19, 131
+# CHECK-NEXT: STRXui $x0, $x19, 1
+# CHECK: bb.1:
+tracksRegLiveness: true
+frameInfo:
+  isFrameAddressTaken: true
+stack:
+  - { id: 0, type: variable-sized,  alignment: 1 }
+  - { id: 1, size: 8, alignment: 8 }
+  - { id: 2, size: 8, alignment: 8 }
+body: |
+  bb.0:
+    liveins: $x0, $x8, $d0
+    $x9 = LDRXui $x0, 0 :: (load (s64))
+    STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
+    STRXui $x0, %stack.2, 0 :: (store (s64) into %stack.2)
+    B %bb.1
+  bb.1:
+    liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
+    RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr
+...
+---
+# +------------------+
+# | GPR callee-saves |
+# +------------------+ <- FP
+# | <hazard padding> |
+# +------------------+
+# | FPR locals       |
+# | %stack.1         |
+# +------------------+
+# | <hazard padding> |
+# +------------------+
+# | GPR locals       |
+# | %stack.2         | (very large)
+# | <emergency spill>|
+# +------------------+ <- BP
+# | <VLA>            |
+# +------------------+ <- SP (can't be used due to VLA)
+
+# In this case we need to use the emergency spill slot to access %stack.1 as it
+# is too far from the frame pointer and the base pointer to directly address.
+# Note: This also tests that the <emergency spill> located near the SP/BP.
+
+name: stack_hazard_streaming_compat_emergency_spill_slot
+# CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot
+# CHECK: bb.0:
+# CHECK:      STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0
+# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0
+# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095
+# CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0
+# CHECK: bb.1:
+tracksRegLiveness: true
+frameInfo:
+  isFrameAddressTaken: true
+stack:
+  - { id: 0, type: variable-sized,  alignment: 1 }
+  - { id: 1, size: 8, alignment: 8 }
+  - { id: 2, size: 32761, alignment: 8 }
+body: |
+  bb.0:
+    liveins: $x0, $x8, $d0
+    $x9 = LDRXui $x0, 0 :: (load (s64))
+    STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
+    B %bb.1
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
+    RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index a4c2b30566a95..df4918493edf8 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -2911,12 +2911,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
 ; CHECK64-NEXT:    mov x9, sp
 ; CHECK64-NEXT:    mov w20, w0
 ; CHECK64-NEXT:    msub x9, x8, x8, x9
+; CHECK64-NEXT:    mov x19, sp
 ; CHECK64-NEXT:    mov sp, x9
-; CHECK64-NEXT:    stur x9, [x29, #-208]
-; CHECK64-NEXT:    sub x9, x29, #208
-; CHECK64-NEXT:    sturh wzr, [x29, #-198]
-; CHECK64-NEXT:    stur wzr, [x29, #-196]
-; CHECK64-NEXT:    sturh w8, [x29, #-200]
+; CHECK64-NEXT:    str x9, [x19]
+; CHECK64-NEXT:    add x9, x19, #0
+; CHECK64-NEXT:    strh wzr, [x19, #10]
+; CHECK64-NEXT:    str wzr, [x19, #12]
+; CHECK64-NEXT:    strh w8, [x19, #8]
 ; CHECK64-NEXT:    msr TPIDR2_EL0, x9
 ; CHECK64-NEXT:    .cfi_offset vg, -32
 ; CHECK64-NEXT:    smstop sm
@@ -2925,7 +2926,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
 ; CHECK64-NEXT:    .cfi_restore vg
 ; CHECK64-NEXT:    smstart za
 ; CHECK64-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK64-NEXT:    sub x0, x29, #208
+; CHECK64-NEXT:    add x0, x19, #0
 ; CHECK64-NEXT:    cbnz x8, .LBB33_2
 ; CHECK64-NEXT:  // %bb.1: // %entry
 ; CHECK64-NEXT:    bl __arm_tpidr2_restore
@@ -2991,16 +2992,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
 ; CHECK1024-NEXT:    mov x9, sp
 ; CHECK1024-NEXT:    mov w20, w0
 ; CHECK1024-NEXT:    msub x9, x8, x8, x9
+; CHECK1024-NEXT:    mov x19, sp
 ; CHECK1024-NEXT:    mov sp, x9
-; CHECK1024-NEXT:    sub x10, x29, #1872
-; CHECK1024-NEXT:    stur x9, [x10, #-256]
-; CHECK1024-NEXT:    sub x9, x29, #1862
-; CHECK1024-NEXT:    sub x10, x29, #1860
-; CHECK1024-NEXT:    sturh wzr, [x9, #-256]
-; CHECK1024-NEXT:    sub x9, x29, #2128
-; CHECK1024-NEXT:    stur wzr, [x10, #-256]
-; CHECK1024-NEXT:    sub x10, x29, #1864
-; CHECK1024-NEXT:    sturh w8, [x10, #-256]
+; CHECK1024-NEXT:    str x9, [x19]
+; CHECK1024-NEXT:    add x9, x19, #0
+; CHECK1024-NEXT:    strh wzr, [x19, #10]
+; CHECK1024-NEXT:    str wzr, [x19, #12]
+; CHECK1024-NEXT:    strh w8, [x19, #8]
 ; CHECK1024-NEXT:    msr TPIDR2_EL0, x9
 ; CHECK1024-NEXT:    .cfi_offset vg, -32
 ; CHECK1024-NEXT:    smstop sm
@@ -3009,7 +3007,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
 ; CHECK1024-NEXT:    .cfi_restore vg
 ; CHECK1024-NEXT:    smstart za
 ; CHECK1024-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK1024-NEXT:    sub x0, x29, #2128
+; CHECK1024-NEXT:    add x0, x19, #0
 ; CHECK1024-NEXT:    cbnz x8, .LBB33_2
 ; CHECK1024-NEXT:  // %bb.1: // %entry
 ; CHECK1024-NEXT:    bl __arm_tpidr2_restore

From acf86c5c4dbe8a65581f5438a61a8dddc4d02b6d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Mon, 2 Jun 2025 17:42:02 +0800
Subject: [PATCH 57/83] [CVP] Keep `ReachableCaseCount` in sync with range of
 condition (#142302)

https://github.com/llvm/llvm-project/pull/79993 assumes that a reachable
case must be contained by `CR`. However, it doesn't hold for some edge
cases. This patch adds additional checks to ensure `ReachableCaseCount`
is correct.

Note: Similar optimization in SCCP isn't affected by this bug because it
uses `CR` to compute `ReachableCaseCount`.

Closes https://github.com/llvm/llvm-project/issues/142286.
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 59 +++++++++++--------
 .../CorrelatedValuePropagation/switch.ll      | 36 +++++++++++
 2 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 8e74b8645fad9..86c4170b9a977 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -370,15 +370,30 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
   { // Scope for SwitchInstProfUpdateWrapper. It must not live during
     // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
     SwitchInstProfUpdateWrapper SI(*I);
+    ConstantRange CR =
+        LVI->getConstantRangeAtUse(I->getOperandUse(0), /*UndefAllowed=*/false);
     unsigned ReachableCaseCount = 0;
 
     for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
       ConstantInt *Case = CI->getCaseValue();
-      auto *Res = dyn_cast_or_null<ConstantInt>(
-          LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
-                              /* UseBlockValue */ true));
+      std::optional<bool> Predicate = std::nullopt;
+      if (!CR.contains(Case->getValue()))
+        Predicate = false;
+      else if (CR.isSingleElement() &&
+               *CR.getSingleElement() == Case->getValue())
+        Predicate = true;
+      if (!Predicate) {
+        // Handle missing cases, e.g., the range has a hole.
+        auto *Res = dyn_cast_or_null<ConstantInt>(
+            LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
+                                /* UseBlockValue=*/true));
+        if (Res && Res->isZero())
+          Predicate = false;
+        else if (Res && Res->isOne())
+          Predicate = true;
+      }
 
-      if (Res && Res->isZero()) {
+      if (Predicate && !*Predicate) {
         // This case never fires - remove it.
         BasicBlock *Succ = CI->getCaseSuccessor();
         Succ->removePredecessor(BB);
@@ -395,7 +410,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
           DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
         continue;
       }
-      if (Res && Res->isOne()) {
+      if (Predicate && *Predicate) {
         // This case always fires.  Arrange for the switch to be turned into an
         // unconditional branch by replacing the switch condition with the case
         // value.
@@ -410,28 +425,24 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
       ++ReachableCaseCount;
     }
 
-    BasicBlock *DefaultDest = SI->getDefaultDest();
-    if (ReachableCaseCount > 1 &&
-        !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())) {
-      ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0),
-                                                    /*UndefAllowed*/ false);
-      // The default dest is unreachable if all cases are covered.
-      if (!CR.isSizeLargerThan(ReachableCaseCount)) {
-        BasicBlock *NewUnreachableBB =
-            BasicBlock::Create(BB->getContext(), "default.unreachable",
-                               BB->getParent(), DefaultDest);
-        new UnreachableInst(BB->getContext(), NewUnreachableBB);
+    // The default dest is unreachable if all cases are covered.
+    if (!SI->defaultDestUndefined() &&
+        !CR.isSizeLargerThan(ReachableCaseCount)) {
+      BasicBlock *DefaultDest = SI->getDefaultDest();
+      BasicBlock *NewUnreachableBB =
+          BasicBlock::Create(BB->getContext(), "default.unreachable",
+                             BB->getParent(), DefaultDest);
+      new UnreachableInst(BB->getContext(), NewUnreachableBB);
 
-        DefaultDest->removePredecessor(BB);
-        SI->setDefaultDest(NewUnreachableBB);
+      DefaultDest->removePredecessor(BB);
+      SI->setDefaultDest(NewUnreachableBB);
 
-        if (SuccessorsCount[DefaultDest] == 1)
-          DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}});
-        DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}});
+      if (SuccessorsCount[DefaultDest] == 1)
+        DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}});
+      DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}});
 
-        ++NumDeadCases;
-        Changed = true;
-      }
+      ++NumDeadCases;
+      Changed = true;
     }
   }
 
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll b/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll
index a0794d5efe932..7e6aa3eeebe20 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll
@@ -294,6 +294,42 @@ cleanup:
   ret i32 %retval.0
 }
 
+; Make sure that we don't branch into unreachable.
+
+define void @pr142286() {
+; CHECK-LABEL: define void @pr142286() {
+; CHECK-NEXT:  start:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    br label [[LOOP2:%.*]]
+; CHECK:       loop2:
+; CHECK-NEXT:    br label [[LOOP3:%.*]]
+; CHECK:       loop3:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+start:
+  br label %loop
+
+loop:
+  %phi = phi i8 [ -1, %start ], [ 0, %loop3 ]
+  br label %loop2
+
+loop2:
+  br label %loop3
+
+loop3:
+  switch i8 %phi, label %exit [
+  i8 0, label %loop3
+  i8 1, label %loop2
+  i8 2, label %loop
+  ]
+
+exit:
+  ret void
+}
+
 declare i32 @call0()
 declare i32 @call1()
 declare i32 @call2()

From 253e9321c8b6bfcecee166312a5f7da81633764e Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Thu, 12 Jun 2025 02:18:21 +0200
Subject: [PATCH 58/83] [release/20.x] Update release notes for SystemZ changes
 (#140060)

---
 llvm/docs/ReleaseNotes.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index f34003eaf0fe2..ef4ec9b56f364 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -349,6 +349,15 @@ Changes to the RISC-V Backend
   extension.
 * Added ``Sdext`` and ``Sdtrig`` extensions.
 
+Changes to the SystemZ Backend
+------------------------------
+
+* Added support for the IBM z17 processor and the arch15 cpu architecture.
+* Added support for `__builtin_setjump` and `__builtin_longjmp`.
+* Improve inlining heuristics to fix compile time explosion in certain cases.
+* Improve various cost functions.
+* Improve compatibility of the assembler parser with the GNU assembler.
+
 Changes to the WebAssembly Backend
 ----------------------------------
 

From 2da24c36c7df73a686b2d990963faf3f738a510f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 28 Feb 2025 20:43:46 -0100
Subject: [PATCH 59/83] [libcxx] Provide locale conversions to tests through
 lit substitution (#105651)

There are 2 problems today that this PR resolves:

libcxx tests assume the thousands separator for fr_FR locale is x00A0 on
Windows. This currently fails when run on newer versions of Windows (it
seems to have been updated to the new correct value of 0x202F around
windows 11. The exact windows version where it changed doesn't seem to
be documented anywhere). Depending the OS version, you need different
values.

There are several ifdefs to determine the environment/platform-specific
locale conversion values and it leads to maintenance as things change
over time.

This PR includes the following changes:

- Provide the environment's locale conversion values through a
  substitution. The test can opt in by placing the substitution value in a
  define flag.
- Remove the platform ifdefs (the swapping of values between Windows,
  Linux, Apple, AIX).

This is accomplished through a lit feature action that fetches the
environment's locale conversions (lconv) for members like
'thousands_sep' that we need to provide. This should ensure that we
don't lose the effectiveness of the test itself.

In addition, as a result of the above, this PR:

- Fixes a handful of locale tests which unexpectedly fail on newer
  Windows versions.
- Resolves 3 XFAIL FIX-MEs.

Originally submitted in https://github.com/llvm/llvm-project/pull/86649.

Co-authored-by: Rodrigo Salazar <4rodrigosalazar@gmail.com>
(cherry picked from commit f909b2229ac16ae3898d8b158bee85c384173dfa)
---
 .../get_long_double_fr_FR.pass.cpp            |  5 +-
 .../get_long_double_ru_RU.pass.cpp            |  5 +-
 .../put_long_double_fr_FR.pass.cpp            |  5 +-
 .../put_long_double_ru_RU.pass.cpp            |  5 +-
 .../thousands_sep.pass.cpp                    | 34 ++-----
 .../thousands_sep.pass.cpp                    | 20 ++--
 .../time.duration.nonmember/ostream.pass.cpp  | 24 ++---
 libcxx/test/support/locale_helpers.h          | 37 ++------
 libcxx/utils/libcxx/test/features.py          | 91 ++++++++++++++++++-
 9 files changed, 138 insertions(+), 88 deletions(-)

diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
index bbb67d694970a..f02241ad36a5b 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp
@@ -13,6 +13,8 @@
 
 // REQUIRES: locale.fr_FR.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
+
 // <locale>
 
 // class money_get<charT, InputIterator>
@@ -59,7 +61,8 @@ class my_facetw
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_fr_FR(in);
+  const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, fr_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
index e680f2ea8816a..371cf0e90c8d3 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
@@ -11,6 +11,8 @@
 
 // REQUIRES: locale.ru_RU.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
+
 // XFAIL: glibc-old-ru_RU-decimal-point
 
 // <locale>
@@ -52,7 +54,8 @@ class my_facetw
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_ru_RU(in);
+  const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, ru_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
index 47a48deb3368c..9ac95cc52ac07 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp
@@ -13,6 +13,8 @@
 
 // REQUIRES: locale.fr_FR.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
+
 // <locale>
 
 // class money_put<charT, OutputIterator>
@@ -59,7 +61,8 @@ class my_facetw
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_fr_FR(in);
+  const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, fr_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
index 4aea1016e735b..be1e397488468 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
@@ -11,6 +11,8 @@
 
 // REQUIRES: locale.ru_RU.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
+
 // XFAIL: glibc-old-ru_RU-decimal-point
 
 // <locale>
@@ -52,7 +54,8 @@ class my_facetw
 };
 
 static std::wstring convert_thousands_sep(std::wstring const& in) {
-  return LocaleHelpers::convert_thousands_sep_ru_RU(in);
+  const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
+  return LocaleHelpers::convert_thousands_sep(in, ru_sep);
 }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
index 2a70741d2a0fa..6b6570576a082 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp
@@ -9,13 +9,14 @@
 // NetBSD does not support LC_MONETARY at the moment
 // XFAIL: netbsd
 
-// XFAIL: LIBCXX-FREEBSD-FIXME
-
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
 // REQUIRES: locale.ru_RU.UTF-8
 // REQUIRES: locale.zh_CN.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
+// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
+
 // <locale>
 
 // class moneypunct_byname<charT, International>
@@ -27,6 +28,7 @@
 #include <cassert>
 
 #include "test_macros.h"
+#include "locale_helpers.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
@@ -110,17 +112,10 @@ int main(int, char**)
         Fnt f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.thousands_sep() == ' ');
     }
-    // The below tests work around GLIBC's use of U202F as mon_thousands_sep.
+
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
-#if defined(_CS_GNU_LIBC_VERSION)
-    const wchar_t fr_sep = glibc_version_less_than("2.27") ? L' ' : L'\u202F';
-#elif defined(_WIN32)
-    const wchar_t fr_sep = L'\u00A0';
-#elif defined(_AIX)
-    const wchar_t fr_sep = L'\u202F';
-#else
-    const wchar_t fr_sep = L' ';
-#endif
+    const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
+
     {
         Fwf f(LOCALE_fr_FR_UTF_8, 1);
         assert(f.thousands_sep() == fr_sep);
@@ -140,19 +135,8 @@ int main(int, char**)
         assert(f.thousands_sep() == sep);
     }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
-    // The below tests work around GLIBC's use of U00A0 as mon_thousands_sep
-    // and U002E as mon_decimal_point.
-    // TODO: Fix thousands_sep for 'char'.
-    // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
-#   if defined(_CS_GNU_LIBC_VERSION)
-    // FIXME libc++ specifically works around \u00A0 by translating it into
-    // a regular space.
-    const wchar_t wsep = glibc_version_less_than("2.27") ? L'\u00A0' : L'\u202F';
-#   elif defined(_WIN32) || defined(_AIX)
-    const wchar_t wsep = L'\u00A0';
-#   else
-    const wchar_t wsep = L' ';
-#   endif
+    const wchar_t wsep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
+
     {
         Fwf f(LOCALE_ru_RU_UTF_8, 1);
         assert(f.thousands_sep() == wsep);
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
index 850352b3bc1ec..ccecd85f2ff87 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp
@@ -14,6 +14,8 @@
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
+
 // <locale>
 
 // template <class charT> class numpunct_byname;
@@ -25,6 +27,7 @@
 #include <cassert>
 
 #include "test_macros.h"
+#include "locale_helpers.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
@@ -74,18 +77,11 @@ int main(int, char**)
         }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
         {
-#if defined(_CS_GNU_LIBC_VERSION)
-            const wchar_t wsep = glibc_version_less_than("2.27") ? L' ' : L'\u202f';
-#  elif defined(_AIX)
-            const wchar_t wsep = L'\u202F';
-#  elif defined(_WIN32)
-            const wchar_t wsep = L'\u00A0';
-#  else
-            const wchar_t wsep = L',';
-#  endif
-            typedef wchar_t C;
-            const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
-            assert(np.thousands_sep() == wsep);
+          const wchar_t wsep = LocaleHelpers::thousands_sep_or_default(FR_THOU_SEP);
+
+          typedef wchar_t C;
+          const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
+          assert(np.thousands_sep() == wsep);
         }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
     }
diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
index aecb96b58719e..ebf907a49c43e 100644
--- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
+++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp
@@ -16,6 +16,9 @@
 // REQUIRES: locale.fr_FR.UTF-8
 // REQUIRES: locale.ja_JP.UTF-8
 
+// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
+// ADDITIONAL_COMPILE_FLAGS: -DFR_DEC_POINT=%{LOCALE_CONV_FR_FR_UTF_8_DECIMAL_POINT}
+
 // <chrono>
 
 // template<class Rep, class Period = ratio<1>> class duration;
@@ -33,6 +36,7 @@
 #include <sstream>
 
 #include "make_string.h"
+#include "locale_helpers.h"
 #include "platform_support.h" // locale name macros
 #include "test_macros.h"
 
@@ -88,21 +92,11 @@ static void test_values() {
     assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1 000,1235s"));
 #endif
   } else {
-#ifdef _WIN32
-    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u00A0000\u00A0000s"));
-    assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u00A0000\u00A0000s"));
-    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u00A0000,1235s"));
-    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u00A0000,1235s"));
-#elif defined(__APPLE__)
-    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1000000s"));
-    assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1000000s"));
-    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1000,1235s"));
-    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1000,1235s"));
-#else
-    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u202f000\u202f000s"));
-    assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u202f000\u202f000s"));
-    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u202f000,1235s"));
-    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u202f000,1235s"));
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+    assert(stream_fr_FR_locale<CharT>(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
+    assert(stream_fr_FR_locale<CharT>(1'000'000s) == L"1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
+    assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == L"-1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
+    assert(stream_fr_FR_locale<CharT>(1'000.123456s) == L"1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
 #endif
   }
 
diff --git a/libcxx/test/support/locale_helpers.h b/libcxx/test/support/locale_helpers.h
index 3eb24ebf28f52..946c2fed0f3a5 100644
--- a/libcxx/test/support/locale_helpers.h
+++ b/libcxx/test/support/locale_helpers.h
@@ -41,37 +41,6 @@ std::wstring convert_thousands_sep(std::wstring const& in, wchar_t sep) {
   return out;
 }
 
-// GLIBC 2.27 and newer use U+202F NARROW NO-BREAK SPACE as a thousands separator.
-// This function converts the spaces in string inputs to U+202F if need
-// be. FreeBSD's locale data also uses U+202F, since 2018.
-// Windows uses U+00A0 NO-BREAK SPACE.
-std::wstring convert_thousands_sep_fr_FR(std::wstring const& in) {
-#if defined(_CS_GNU_LIBC_VERSION)
-  if (glibc_version_less_than("2.27"))
-    return in;
-  else
-    return convert_thousands_sep(in, L'\u202F');
-#elif defined(__FreeBSD__)
-  return convert_thousands_sep(in, L'\u202F');
-#elif defined(_WIN32)
-  return convert_thousands_sep(in, L'\u00A0');
-#else
-  return in;
-#endif
-}
-
-// GLIBC 2.27 uses U+202F NARROW NO-BREAK SPACE as a thousands separator.
-// FreeBSD, AIX and Windows use U+00A0 NO-BREAK SPACE.
-std::wstring convert_thousands_sep_ru_RU(std::wstring const& in) {
-#if defined(TEST_HAS_GLIBC)
-  return convert_thousands_sep(in, L'\u202F');
-#  elif defined(__FreeBSD__) || defined(_WIN32) || defined(_AIX)
-  return convert_thousands_sep(in, L'\u00A0');
-#  else
-  return in;
-#  endif
-}
-
 std::wstring negate_en_US(std::wstring s) {
 #if defined(_WIN32)
   return L"(" + s + L")";
@@ -80,6 +49,12 @@ std::wstring negate_en_US(std::wstring s) {
 #endif
 }
 
+wchar_t thousands_sep_or_default(std::wstring s) { return !s.empty() ? s[0] : L','; }
+
+wchar_t mon_thousands_sep_or_default(std::wstring s) { return thousands_sep_or_default(s); }
+
+wchar_t decimal_point_or_default(std::wstring s) { return !s.empty() ? s[0] : L'.'; }
+
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
 std::string negate_en_US(std::string s) {
diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index e4b413deff4db..a83dcd16b16f8 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -425,6 +425,10 @@ def _mingwSupportsModules(cfg):
     "fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"],
     "cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"],
 }
+provide_locale_conversions = {
+    "fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"],
+    "ru_RU.UTF-8": ["mon_thousands_sep"],
+}
 for locale, alts in locales.items():
     # Note: Using alts directly in the lambda body here will bind it to the value at the
     # end of the loop. Assigning it to a default argument works around this issue.
@@ -432,10 +436,95 @@ def _mingwSupportsModules(cfg):
         Feature(
             name="locale.{}".format(locale),
             when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts),
-        )
+            actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction(
+                cfg, locale, alts, provide_locale_conversions[locale]
+            )
+            if locale in provide_locale_conversions
+            and "_LIBCPP_HAS_NO_WIDE_CHARACTERS" not in compilerMacros(cfg)
+            else [],
+        ),
     )
 
 
+# Provide environment locale conversions through substitutions to avoid platform specific
+# maintenance.
+def _getLocaleFlagsAction(cfg, locale, alts, members):
+    alts_list = ",".join([f'"{l}"' for l in alts])
+    get_member_list = ",".join([f"lc->{m}" for m in members])
+
+    localeconv_info = programOutput(
+        cfg,
+        r"""
+        #if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
+        #define _CRT_SECURE_NO_WARNINGS
+        #endif
+        #include <stdio.h>
+        #include <locale.h>
+        #include <stdlib.h>
+        #include <wchar.h>
+
+        // Print each requested locale conversion member on separate lines.
+        int main() {
+          const char* locales[] = { %s };
+          for (int loc_i = 0; loc_i < %d; ++loc_i) {
+            if (!setlocale(LC_ALL, locales[loc_i])) {
+              continue; // Choose first locale name that is recognized.
+            }
+
+            lconv* lc = localeconv();
+            const char* members[] = { %s };
+            for (size_t m_i = 0; m_i < %d; ++m_i) {
+              if (!members[m_i]) {
+                printf("\n"); // member value is an empty string
+                continue;
+              }
+
+              size_t len = mbstowcs(nullptr, members[m_i], 0);
+              if (len == static_cast<size_t>(-1)) {
+                fprintf(stderr, "mbstowcs failed unexpectedly\n");
+                return 1;
+              }
+              // Include room for null terminator. Use malloc as these features
+              // are also used by lit configs that don't use -lc++ (libunwind tests).
+              wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t));
+              size_t ret = mbstowcs(dst, members[m_i], len + 1);
+              if (ret == static_cast<size_t>(-1)) {
+                fprintf(stderr, "mbstowcs failed unexpectedly\n");
+                free(dst);
+                return 1;
+              }
+
+              for (size_t i = 0; i < len; ++i) {
+                if (dst[i] > 0x7F) {
+                  printf("\\u%%04x", dst[i]);
+                } else {
+                  // c++03 does not allow basic ascii-range characters in UCNs
+                  printf("%%c", (char)dst[i]);
+                }
+              }
+              printf("\n");
+              free(dst);
+            }
+            return 0;
+          }
+
+          return 1;
+        }
+        """
+        % (alts_list, len(alts), get_member_list, len(members)),
+    )
+    valid_define_name = re.sub(r"[.-]", "_", locale).upper()
+    return [
+        # Provide locale conversion through a substitution.
+        # Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f"
+        AddSubstitution(
+            f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}",
+            lambda cfg, value=value: f"'L\"{value}\"'",
+        )
+        for member, value in zip(members, localeconv_info.split("\n"))
+    ]
+
+
 # Add features representing the target platform name: darwin, linux, windows, etc...
 DEFAULT_FEATURES += [
     Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)),

From b8e10ca59b6ad9d0c2828b418b5a17391ae6cda8 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Mon, 17 Mar 2025 22:13:51 -0400
Subject: [PATCH 60/83] [libc++] Fix check for _LIBCPP_HAS_NO_WIDE_CHARACTERS
 in features.py (#131675)

The patch that added the new locale Lit features was created before we
switched to a 0-1 macro for _LIBCPP_HAS_WIDE_CHARACTERS, leading to that
patch referring to the obsolete _LIBCPP_HAS_NO_WIDE_CHARACTERS macro
that is never defined nowadays.

(cherry picked from commit 297f6d9f6b215bd7f58cf500b979b94dedbba7bb)
---
 libcxx/utils/libcxx/test/features.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index a83dcd16b16f8..10fc4b0afde6b 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -440,7 +440,8 @@ def _mingwSupportsModules(cfg):
                 cfg, locale, alts, provide_locale_conversions[locale]
             )
             if locale in provide_locale_conversions
-            and "_LIBCPP_HAS_NO_WIDE_CHARACTERS" not in compilerMacros(cfg)
+            and ("_LIBCPP_HAS_WIDE_CHARACTERS" not in compilerMacros(cfg) or
+                 compilerMacros(cfg)["_LIBCPP_HAS_WIDE_CHARACTERS"] == "1")
             else [],
         ),
     )

From 337beb73abfe05c2db1158f211042eb8763165ea Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Wed, 7 May 2025 16:09:40 +0200
Subject: [PATCH 61/83] [libc++] Add _LIBCPP_NO_UNIQUE_ADDRESS to
 flat_{,multi}map::value_compare (#137594)

This breaks the ABI of `flat_{,multi}map::value_compare`, but this type
has only been introduced in LLVM 20, so it should be very unlikely that
we break anybody if we back-port this now.

(cherry picked from commit ed0aa9961caa177098e9b7e69e98034d676f192e)
---
 libcxx/include/__flat_map/flat_map.h      | 2 +-
 libcxx/include/__flat_map/flat_multimap.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h
index a0594ed9dc411..9cc39c0a1e067 100644
--- a/libcxx/include/__flat_map/flat_map.h
+++ b/libcxx/include/__flat_map/flat_map.h
@@ -113,7 +113,7 @@ class flat_map {
 
   class value_compare {
   private:
-    key_compare __comp_;
+    _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_;
     _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {}
     friend flat_map;
 
diff --git a/libcxx/include/__flat_map/flat_multimap.h b/libcxx/include/__flat_map/flat_multimap.h
index ea77fb5d79bd2..15fcd7995ad0a 100644
--- a/libcxx/include/__flat_map/flat_multimap.h
+++ b/libcxx/include/__flat_map/flat_multimap.h
@@ -115,7 +115,7 @@ class flat_multimap {
 
   class value_compare {
   private:
-    key_compare __comp_;
+    _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_;
     _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {}
     friend flat_multimap;
 

From 6fa0cdf3720b8b9103f31009c089b56a9a176653 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Tue, 3 Jun 2025 09:51:37 -0700
Subject: [PATCH 62/83] release/20.x: [clang] Don't evaluate the initializer of
 constexpr-unknown parameters. (#142498)

Backport 97885213bd4507b204b050c3cd570e365d21cc7d
---
 clang/lib/AST/ExprConstant.cpp                     |  7 ++++++-
 clang/test/SemaCXX/constant-expression-p2280r4.cpp | 12 ++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e0746f4532245..209b269122a8e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -3525,7 +3525,12 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
   // should begin within the evaluation of E
   // Used to be C++20 [expr.const]p5.12.2:
   // ... its lifetime began within the evaluation of E;
-  if (isa<ParmVarDecl>(VD) && !AllowConstexprUnknown) {
+  if (isa<ParmVarDecl>(VD)) {
+    if (AllowConstexprUnknown) {
+      Result = &Info.CurrentCall->createConstexprUnknownAPValues(VD, Base);
+      return true;
+    }
+
     // Assume parameters of a potential constant expression are usable in
     // constant expressions.
     if (!Info.checkingPotentialConstantExpression() ||
diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
index 87beeb4d3dc84..dbaebb81b93e8 100644
--- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp
+++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
@@ -200,3 +200,15 @@ int f() {
     return !get_value<Dummy>(); // contextually convert the function call result to bool
 }
 }
+
+namespace param_reference {
+  constexpr int arbitrary = -12345;
+  constexpr void f(const int &x = arbitrary) { // expected-note {{declared here}}
+    constexpr const int &v1 = x; // expected-error {{must be initialized by a constant expression}} \
+    // expected-note {{reference to 'x' is not a constant expression}}
+    constexpr const int &v2 = (x, arbitrary); // expected-warning {{left operand of comma operator has no effect}}
+    constexpr int v3 = x; // expected-error {{must be initialized by a constant expression}}
+    static_assert(x==arbitrary); // expected-error {{static assertion expression is not an integral constant expression}}
+    static_assert(&x - &x == 0);
+  }
+}

From c4f257cb74b54c271bbff1649b71c40e8d8c50d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 10 Jun 2025 10:23:19 +0300
Subject: [PATCH 63/83] [llvm-rc] Allow ALT on non-virtkey accelerators
 (#143374)

While
https://learn.microsoft.com/en-us/windows/win32/menurc/accelerators-resource
specifies that ALT only applies to virtkeys, this doesn't seem to be the
case in reality.

https://learn.microsoft.com/en-us/windows/win32/menurc/using-keyboard-accelerators
contains an example that uses this combination:

    "B",   ID_ACCEL5, ALT                   ; ALT_SHIFT+B

Also Microsoft also includes such cases in their repo of test cases:
https://github.com/microsoft/Windows-classic-samples/blob/263dd514ad215d0a40d1ec44b4df84b30ec11dcf/Samples/Win7Samples/begin/sdkdiff/sdkdiff.rc#L161-L164

Also MS rc.exe doesn't warn/error about this. However if applying SHIFT
or CONTROL on a non-virtkey accelerator, MS rc.exe does produce this
warning:

    warning RC4203 : SHIFT or CONTROL used without VIRTKEY

Hence, keep the checks for SHIFT and CONTROL, but remove the checks for
ALT, which seems to have been incorrect.

This fixes one aspect of
https://github.com/llvm/llvm-project/issues/143157.

(cherry picked from commit 77347d6513de6a6f5dee8ade76e0a0ad1552c12b)
---
 .../llvm-rc/Inputs/tag-accelerators-ascii-alt.rc  |  4 ----
 .../test/tools/llvm-rc/Inputs/tag-accelerators.rc |  1 +
 llvm/test/tools/llvm-rc/tag-accelerators.test     | 15 +++++----------
 llvm/tools/llvm-rc/ResourceFileWriter.cpp         |  4 ++--
 4 files changed, 8 insertions(+), 16 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc

diff --git a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc b/llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc
deleted file mode 100644
index 363263bfe4cf2..0000000000000
--- a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc
+++ /dev/null
@@ -1,4 +0,0 @@
-2 ACCELERATORS {
-  "A", 15, ASCII, ALT
-}
-
diff --git a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc b/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc
index 90e7f926cc087..bcfc35bdeab68 100644
--- a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc
+++ b/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc
@@ -110,5 +110,6 @@ LANGUAGE 5, 1
   "7", 71, VIRTKEY, NOINVERT, CONTROL, SHIFT, ALT
   "^j", 72, ASCII
   "^j", 73, ASCII, NOINVERT
+  "A", 15, ASCII, ALT
 }
 
diff --git a/llvm/test/tools/llvm-rc/tag-accelerators.test b/llvm/test/tools/llvm-rc/tag-accelerators.test
index 336727f617687..4f44aebc75011 100644
--- a/llvm/test/tools/llvm-rc/tag-accelerators.test
+++ b/llvm/test/tools/llvm-rc/tag-accelerators.test
@@ -37,7 +37,7 @@
 ; ACCELERATORS-NEXT: Version (major): 0
 ; ACCELERATORS-NEXT: Version (minor): 0
 ; ACCELERATORS-NEXT: Characteristics: 0
-; ACCELERATORS-NEXT: Data size: 592
+; ACCELERATORS-NEXT: Data size: 600
 ; ACCELERATORS-NEXT: Data: (
 ; ACCELERATORS-NEXT:   0000: 00002A00 00000000 01002A00 01000000  |..*.......*.....|
 ; ACCELERATORS-NEXT:   0010: 02002A00 02000000 03002A00 03000000  |..*.......*.....|
@@ -75,7 +75,8 @@
 ; ACCELERATORS-NEXT:   0210: 15003700 42000000 0F003700 43000000  |..7.B.....7.C...|
 ; ACCELERATORS-NEXT:   0220: 1B003700 44000000 17003700 45000000  |..7.D.....7.E...|
 ; ACCELERATORS-NEXT:   0230: 1D003700 46000000 1F003700 47000000  |..7.F.....7.G...|
-; ACCELERATORS-NEXT:   0240: 00000A00 48000000 82000A00 49000000  |....H.......I...|
+; ACCELERATORS-NEXT:   0240: 00000A00 48000000 02000A00 49000000  |....H.......I...|
+; ACCELERATORS-NEXT:   0250: 90004100 0F000000                    |..A.....|
 ; ACCELERATORS-NEXT: )
 
 
@@ -94,19 +95,13 @@
 ; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-ascii-control.rc 2>&1 | FileCheck %s --check-prefix ASCII2
 
 ; ASCII2: llvm-rc: Error in ACCELERATORS statement (ID 2):
-; ASCII2-NEXT: Accelerator ID 15: Can only apply ALT, SHIFT or CONTROL to VIRTKEY accelerators
+; ASCII2-NEXT: Accelerator ID 15: Can only apply SHIFT or CONTROL to VIRTKEY accelerators
 
 
 ; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-ascii-shift.rc 2>&1 | FileCheck %s --check-prefix ASCII3
 
 ; ASCII3: llvm-rc: Error in ACCELERATORS statement (ID 2):
-; ASCII3-NEXT: Accelerator ID 15: Can only apply ALT, SHIFT or CONTROL to VIRTKEY accelerators
-
-
-; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-ascii-alt.rc 2>&1 | FileCheck %s --check-prefix ASCII4
-
-; ASCII4: llvm-rc: Error in ACCELERATORS statement (ID 2):
-; ASCII4-NEXT: Accelerator ID 15: Can only apply ALT, SHIFT or CONTROL to VIRTKEY accelerators
+; ASCII3-NEXT: Accelerator ID 15: Can only apply SHIFT or CONTROL to VIRTKEY accelerators
 
 
 ; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-bad-key-id.rc 2>&1 | FileCheck %s --check-prefix BADKEYID
diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
index 85b59532bb83b..35c0768e33322 100644
--- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp
+++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp
@@ -631,8 +631,8 @@ Error ResourceFileWriter::writeSingleAccelerator(
   if (IsASCII && IsVirtKey)
     return createAccError("Accelerator can't be both ASCII and VIRTKEY");
 
-  if (!IsVirtKey && (Obj.Flags & (Opt::ALT | Opt::SHIFT | Opt::CONTROL)))
-    return createAccError("Can only apply ALT, SHIFT or CONTROL to VIRTKEY"
+  if (!IsVirtKey && (Obj.Flags & (Opt::SHIFT | Opt::CONTROL)))
+    return createAccError("Can only apply SHIFT or CONTROL to VIRTKEY"
                           " accelerators");
 
   if (Obj.Event.isInt()) {

From 02aec86e4d0d1740fd6ca5a01b3154938682910d Mon Sep 17 00:00:00 2001
From: fleeting-xx <bakerdt@gmail.com>
Date: Thu, 5 Jun 2025 20:33:11 -0500
Subject: [PATCH 64/83] [clangd] [Modules] Fix to correctly handle module
 dependencies (#142828)

This is a re-application of llvm/llvm-project#142090 without the unit
test changes. A subsequent PR will follow that adds a unit test for
module dependencies.

- Fix dangling string references in the return value of
getAllRequiredModules()
- Change a couple of calls in getOrBuildModuleFile() to use the loop
variable instead of the ModuleName parameter.
---
 clang-tools-extra/clangd/ModulesBuilder.cpp   | 18 ++--
 clang-tools-extra/clangd/ProjectModules.h     |  2 +-
 .../clangd/ScanningProjectModules.cpp         |  6 +-
 .../clangd/test/module_dependencies.test      | 96 +++++++++++++++++++
 4 files changed, 110 insertions(+), 12 deletions(-)
 create mode 100644 clang-tools-extra/clangd/test/module_dependencies.test

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index bee31fe51555e..2d2f0f6374486 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -360,9 +360,9 @@ void ModuleFileCache::remove(StringRef ModuleName) {
 /// Collect the directly and indirectly required module names for \param
 /// ModuleName in topological order. The \param ModuleName is guaranteed to
 /// be the last element in \param ModuleNames.
-llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
+llvm::SmallVector<std::string> getAllRequiredModules(ProjectModules &MDB,
                                                    StringRef ModuleName) {
-  llvm::SmallVector<llvm::StringRef> ModuleNames;
+  llvm::SmallVector<std::string> ModuleNames;
   llvm::StringSet<> ModuleNamesSet;
 
   auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
@@ -373,7 +373,7 @@ llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
       if (ModuleNamesSet.insert(RequiredModuleName).second)
         Visitor(RequiredModuleName, Visitor);
 
-    ModuleNames.push_back(ModuleName);
+    ModuleNames.push_back(ModuleName.str());
   };
   VisitDeps(ModuleName, VisitDeps);
 
@@ -418,13 +418,13 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
   // Get Required modules in topological order.
   auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
   for (llvm::StringRef ReqModuleName : ReqModuleNames) {
-    if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
+    if (BuiltModuleFiles.isModuleUnitBuilt(ReqModuleName))
       continue;
 
     if (auto Cached = Cache.getModule(ReqModuleName)) {
       if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles,
                                TFS.view(std::nullopt))) {
-        log("Reusing module {0} from {1}", ModuleName,
+        log("Reusing module {0} from {1}", ReqModuleName,
             Cached->getModuleFilePath());
         BuiltModuleFiles.addModuleFile(std::move(Cached));
         continue;
@@ -432,14 +432,16 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
       Cache.remove(ReqModuleName);
     }
 
+    std::string ReqFileName =
+        MDB.getSourceForModuleName(ReqModuleName);
     llvm::Expected<ModuleFile> MF = buildModuleFile(
-        ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles);
+        ReqModuleName, ReqFileName, getCDB(), TFS, BuiltModuleFiles);
     if (llvm::Error Err = MF.takeError())
       return Err;
 
-    log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
+    log("Built module {0} to {1}", ReqModuleName, MF->getModuleFilePath());
     auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
-    Cache.add(ModuleName, BuiltModuleFile);
+    Cache.add(ReqModuleName, BuiltModuleFile);
     BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
   }
 
diff --git a/clang-tools-extra/clangd/ProjectModules.h b/clang-tools-extra/clangd/ProjectModules.h
index 48d52ac9deb89..5296508e0584d 100644
--- a/clang-tools-extra/clangd/ProjectModules.h
+++ b/clang-tools-extra/clangd/ProjectModules.h
@@ -42,7 +42,7 @@ class ProjectModules {
       llvm::unique_function<void(tooling::CompileCommand &, PathRef) const>;
 
   virtual std::vector<std::string> getRequiredModules(PathRef File) = 0;
-  virtual PathRef
+  virtual std::string
   getSourceForModuleName(llvm::StringRef ModuleName,
                          PathRef RequiredSrcFile = PathRef()) = 0;
 
diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp b/clang-tools-extra/clangd/ScanningProjectModules.cpp
index e4dc11c1c2895..859aba3673dc4 100644
--- a/clang-tools-extra/clangd/ScanningProjectModules.cpp
+++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp
@@ -66,7 +66,7 @@ class ModuleDependencyScanner {
   ///
   /// TODO: We should handle the case that there are multiple source files
   /// declaring the same module.
-  PathRef getSourceForModuleName(llvm::StringRef ModuleName) const;
+  std::string getSourceForModuleName(llvm::StringRef ModuleName) const;
 
   /// Return the direct required modules. Indirect required modules are not
   /// included.
@@ -140,7 +140,7 @@ void ModuleDependencyScanner::globalScan(
   GlobalScanned = true;
 }
 
-PathRef ModuleDependencyScanner::getSourceForModuleName(
+std::string ModuleDependencyScanner::getSourceForModuleName(
     llvm::StringRef ModuleName) const {
   assert(
       GlobalScanned &&
@@ -189,7 +189,7 @@ class ScanningAllProjectModules : public ProjectModules {
 
   /// RequiredSourceFile is not used intentionally. See the comments of
   /// ModuleDependencyScanner for detail.
-  PathRef
+  std::string
   getSourceForModuleName(llvm::StringRef ModuleName,
                          PathRef RequiredSourceFile = PathRef()) override {
     Scanner.globalScan(Mangler);
diff --git a/clang-tools-extra/clangd/test/module_dependencies.test b/clang-tools-extra/clangd/test/module_dependencies.test
new file mode 100644
index 0000000000000..1023b2363c9fa
--- /dev/null
+++ b/clang-tools-extra/clangd/test/module_dependencies.test
@@ -0,0 +1,96 @@
+# A smoke test to check that a simple dependency chain for modules can work.
+#
+# FIXME: This fails on the Windows ARM64 build server. Not entirely sure why as it has been tested on
+#        an ARM64 Windows VM and appears to work there.
+# UNSUPPORTED: host=aarch64-pc-windows-msvc
+#
+# RUN: rm -fr %t
+# RUN: mkdir -p %t
+# RUN: split-file %s %t
+#
+# RUN: sed -e "s|DIR|%/t|g" %t/compile_commands.json.tmpl > %t/compile_commands.json.tmp
+# RUN: sed -e "s|CLANG_CC|%clang|g" %t/compile_commands.json.tmp > %t/compile_commands.json
+# RUN: sed -e "s|DIR|%/t|g" %t/definition.jsonrpc.tmpl > %t/definition.jsonrpc.tmp
+#
+# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..."
+# (with the extra slash in the front), so we add it here.
+# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t/definition.jsonrpc.tmp > %/t/definition.jsonrpc
+#
+# RUN: clangd -experimental-modules-support -lit-test < %t/definition.jsonrpc \
+# RUN:      | FileCheck -strict-whitespace %t/definition.jsonrpc
+
+#--- A-frag.cppm
+export module A:frag;
+export void printA() {}
+
+#--- A.cppm
+export module A;
+export import :frag;
+
+#--- Use.cpp
+import A;
+void foo() {
+    print
+}
+
+#--- compile_commands.json.tmpl
+[
+    {
+      "directory": "DIR",
+      "command": "CLANG_CC -fprebuilt-module-path=DIR -std=c++20 -o DIR/main.cpp.o -c DIR/Use.cpp",
+      "file": "DIR/Use.cpp"
+    },
+    {
+      "directory": "DIR",
+      "command": "CLANG_CC -std=c++20 DIR/A.cppm --precompile -o DIR/A.pcm",
+      "file": "DIR/A.cppm"
+    },
+    {
+      "directory": "DIR",
+      "command": "CLANG_CC -std=c++20 DIR/A-frag.cppm --precompile -o DIR/A-frag.pcm",
+      "file": "DIR/A-frag.cppm"
+    }
+]
+
+#--- definition.jsonrpc.tmpl
+{
+  "jsonrpc": "2.0",
+  "id": 0,
+  "method": "initialize",
+  "params": {
+    "processId": 123,
+    "rootPath": "clangd",
+    "capabilities": {
+      "textDocument": {
+        "completion": {
+          "completionItem": {
+            "snippetSupport": true
+          }
+        }
+      }
+    },
+    "trace": "off"
+  }
+}
+---
+{
+  "jsonrpc": "2.0",
+  "method": "textDocument/didOpen",
+  "params": {
+    "textDocument": {
+      "uri": "file://DIR/Use.cpp",
+      "languageId": "cpp",
+      "version": 1,
+      "text": "import A;\nvoid foo() {\n    print\n}\n"
+    }
+  }
+}
+
+# CHECK: "message"{{.*}}printA{{.*}}(fix available)
+
+---
+{"jsonrpc":"2.0","id":1,"method":"textDocument/completion","params":{"textDocument":{"uri":"file://DIR/Use.cpp"},"context":{"triggerKind":1},"position":{"line":2,"character":6}}}
+---
+{"jsonrpc":"2.0","id":2,"method":"shutdown"}
+---
+{"jsonrpc":"2.0","method":"exit"}

From 199e02a3643318a16a0a7fdcc677ac55cf769fdb Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 10 Jun 2025 13:04:51 +0200
Subject: [PATCH 65/83] Disable clangd/test/module_dependencies.test on Windows

The test fails (sometimes); see discussion on https://github.com/llvm/llvm-project/pull/142828
---
 clang-tools-extra/clangd/test/module_dependencies.test | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clangd/test/module_dependencies.test b/clang-tools-extra/clangd/test/module_dependencies.test
index 1023b2363c9fa..79306a73da435 100644
--- a/clang-tools-extra/clangd/test/module_dependencies.test
+++ b/clang-tools-extra/clangd/test/module_dependencies.test
@@ -1,8 +1,7 @@
 # A smoke test to check that a simple dependency chain for modules can work.
 #
-# FIXME: This fails on the Windows ARM64 build server. Not entirely sure why as it has been tested on
-#        an ARM64 Windows VM and appears to work there.
-# UNSUPPORTED: host=aarch64-pc-windows-msvc
+# FIXME: The test fails on Windows; see comments on https://github.com/llvm/llvm-project/pull/142828
+# UNSUPPORTED: system-windows
 #
 # RUN: rm -fr %t
 # RUN: mkdir -p %t

From 9ba132be8eea545845cb22344ace56cdd43637d5 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Mon, 2 Jun 2025 20:14:28 +0530
Subject: [PATCH 66/83] [clan-reply] Backport PTU error recovery to 20.x

This cherry-picks 3b4c51bb3243a02526313c51207a674139b67a00 and
beffd1509af7b12eeab0d5ae85b2f8322e039287 to 20.x.

Which are:
[clang-repl] Fix error recovery while PTU cleanup (#127467)
[clang][Interpreter] Disable part of lambda test on Windows

The latter commit avoids a test failure seen in Windows builds.
On main, I turned off one of the RUN lines for Windows, but reviewers
on the cherry-pick preferred UNSUPPORTED to disable the whole test.

So I have used UNSUPPORTED in this version for 20.x.
---
 clang/lib/Interpreter/IncrementalParser.cpp |  2 +-
 clang/test/Interpreter/lambda.cpp           | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp
index e43cea1baf43a..1d223e230669c 100644
--- a/clang/lib/Interpreter/IncrementalParser.cpp
+++ b/clang/lib/Interpreter/IncrementalParser.cpp
@@ -175,7 +175,7 @@ void IncrementalParser::CleanUpPTU(TranslationUnitDecl *MostRecentTU) {
   // FIXME: We should de-allocate MostRecentTU
   for (Decl *D : MostRecentTU->decls()) {
     auto *ND = dyn_cast<NamedDecl>(D);
-    if (!ND)
+    if (!ND || ND->getDeclName().isEmpty())
       continue;
     // Check if we need to clean up the IdResolver chain.
     if (ND->getDeclName().getFETokenInfo() && !D->getLangOpts().ObjC &&
diff --git a/clang/test/Interpreter/lambda.cpp b/clang/test/Interpreter/lambda.cpp
index df75274a050b2..fee6c73bf95cb 100644
--- a/clang/test/Interpreter/lambda.cpp
+++ b/clang/test/Interpreter/lambda.cpp
@@ -1,7 +1,11 @@
 // REQUIRES: host-supports-jit
 // UNSUPPORTED: system-aix
+// At -O2, somehow "x = 42" appears first when piped into FileCheck,
+// see https://github.com/llvm/llvm-project/issues/143547.
+// UNSUPPORTED: system-windows
 // RUN: cat %s | clang-repl | FileCheck %s
-// RUN: cat %s | clang-repl -Xcc -O2 | FileCheck %s
+// RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify -Xcc -O2 | FileCheck %s
+
 extern "C" int printf(const char *, ...);
 
 auto l1 = []() { printf("ONE\n"); return 42; };
@@ -14,4 +18,14 @@ auto r2 = l2();
 auto r3 = l2();
 // CHECK: TWO
 
-%quit
+// Verify non-local lambda capture error is correctly reported
+int x = 42;
+
+// expected-error {{non-local lambda expression cannot have a capture-default}}
+auto capture = [&]() { return x * 2; };
+
+// Ensure interpreter continues and x is still valid
+printf("x = %d\n", x);
+// CHECK: x = 42
+
+%quit
\ No newline at end of file

From 6146a88f60492b520a36f8f8f3231e15f3cc6082 Mon Sep 17 00:00:00 2001
From: Ami-zhang <zhanglimin@loongson.cn>
Date: Thu, 12 Jun 2025 20:11:14 +0800
Subject: [PATCH 67/83] [LoongArch] Fix '-mno-lsx' option not disabling LASX
 feature (#143821)

When '-march' with LASX feature and '-mno-lsx' options are used
together, '-mno-lsx' fails to disable LASX, leaving
'HasFeatureLASX=true' and causing incorrect '__loongarch_sx/asx=1' macro
definition.

Fixes https://github.com/loongson-community/discussions/issues/95

(cherry picked from commit 2ecbfc0beb42abbbd2c3d28bfd576b38c44a5b46)
---
 clang/lib/Driver/ToolChains/Arch/LoongArch.cpp | 1 +
 clang/test/Preprocessor/init-loongarch.c       | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
index 0575a1ebef3a6..1666253db54cb 100644
--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -253,6 +253,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
         Features.push_back("+lsx");
     } else /*-mno-lsx*/ {
       Features.push_back("-lsx");
+      Features.push_back("-lasx");
     }
   }
 
diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c
index ac461b371162f..71a266b8a9157 100644
--- a/clang/test/Preprocessor/init-loongarch.c
+++ b/clang/test/Preprocessor/init-loongarch.c
@@ -946,6 +946,10 @@
 // RUN:   | FileCheck --match-full-lines --check-prefix=MNO-LSX %s
 // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \
 // RUN:   | FileCheck --match-full-lines --check-prefix=MNO-LSX %s
+// RUN: %clang --target=loongarch64 -march=la464 -mno-lsx -x c -E -dM %s -o - \
+// RUN:   | FileCheck --match-full-lines --check-prefix=MNO-LSX %s
+// RUN: %clang --target=loongarch64 -mno-lsx -march=la464 -x c -E -dM %s -o - \
+// RUN:   | FileCheck --match-full-lines --check-prefix=MNO-LSX %s
 // MNO-LSX-NOT: #define __loongarch_asx
 // MNO-LSX-NOT: #define __loongarch_simd_width
 // MNO-LSX-NOT: #define __loongarch_sx

From b83658b7e2c876a627a06c56196eff06b2441c9d Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 7 Jul 2025 16:43:58 -0700
Subject: [PATCH 68/83] Bump version to 20.1.8

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 1887043b7612a..34900b999a4ae 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 7)
+  set(LLVM_VERSION_PATCH 8)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)

From 5ac3ce8196886f245cdaa58e87036e19e7857081 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Wed, 2 Jul 2025 05:26:30 +0200
Subject: [PATCH 69/83] [WebAssembly] Fix inline assembly with vector types
 (#146574)

This commit fixes using inline assembly with v128 results. Previously
this failed with an internal assertion about a failure to legalize a
`CopyFromReg` where the source register was typed `v8f16`. It looks like
the type used for the destination register was whatever was listed first
in the `def V128 : WebAssemblyRegClass` listing, so the types were
shuffled around to have a default-supported type.

A small test was added as well which failed to generate previously and
should now pass in generation. This test passed on LLVM 18 additionally
and regressed by accident in #93228 which was first included in LLVM 19.

(cherry picked from commit a8a9a7f95a695c02bdf3d5821d1c62cc8e08c2ff)
---
 .../lib/Target/WebAssembly/WebAssemblyRegisterInfo.td |  4 ++--
 llvm/test/CodeGen/WebAssembly/inline-asm.ll           | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 17889dacc868c..31a33c1e7365b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -64,8 +64,8 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32, I32_0)>;
 def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64, I64_0)>;
 def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
 def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
-def V128 : WebAssemblyRegClass<[v8f16, v4f32, v2f64, v2i64, v4i32, v16i8,
-                                v8i16],
+def V128 : WebAssemblyRegClass<[v2i64, v4i32, v16i8, v8i16,
+                                v8f16, v4f32, v2f64],
                                128, (add V128_0)>;
 def FUNCREF : WebAssemblyRegClass<[funcref], 0, (add FUNCREF_0)>;
 def EXTERNREF : WebAssemblyRegClass<[externref], 0, (add EXTERNREF_0)>;
diff --git a/llvm/test/CodeGen/WebAssembly/inline-asm.ll b/llvm/test/CodeGen/WebAssembly/inline-asm.ll
index 4462cfb7aa0c4..c378fd953a555 100644
--- a/llvm/test/CodeGen/WebAssembly/inline-asm.ll
+++ b/llvm/test/CodeGen/WebAssembly/inline-asm.ll
@@ -129,7 +129,18 @@ entry:
   ret i32 %ret
 }
 
+; CHECK-LABEL: v128_load
+; CHECK: local.get 0
+; CHECK-NEXT: v128.load 0
+; CHECK-NEXT: local.set 1
+define <4 x i32> @v128_load(ptr %v) #1 {
+entry:
+  %0 = tail call <4 x i32> asm "local.get $1\0Av128.load 0\0Alocal.set $0", "=r,r"(ptr %v)
+  ret <4 x i32> %0
+}
+
 attributes #0 = { nounwind }
+attributes #1 = { "target-features"="+simd128" }
 
 !0 = !{i32 47}
 !1 = !{i32 145}

From 5532d5b745e4c89b1c0fd0a4e67529d13e1a282b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Thu, 26 Jun 2025 13:26:33 +0100
Subject: [PATCH 70/83] [AArch64] Ensure the LR is preserved if we must call
 __arm_get_current_vg (#145760)

Fixes #145635

(cherry picked from commit af7166a3f126ce4e4d2a05eccc1358bd0427cf0f)
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 10 +++-
 .../AArch64/sme-must-save-lr-for-vg.ll        | 49 +++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d3abd79b85a75..74b80438a28b2 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3792,6 +3792,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       CSStackSize += SpillSize;
   }
 
+  // Save number of saved regs, so we can easily update CSStackSize later to
+  // account for any additional 64-bit GPR saves. Note: After this point
+  // only 64-bit GPRs can be added to SavedRegs.
+  unsigned NumSavedRegs = SavedRegs.count();
+
   // Increase the callee-saved stack size if the function has streaming mode
   // changes, as we will need to spill the value of the VG register.
   // For locally streaming functions, we spill both the streaming and
@@ -3811,8 +3816,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (AFI->hasStackHazardSlotIndex())
     CSStackSize += getStackHazardSize(MF);
 
-  // Save number of saved regs, so we can easily update CSStackSize later.
-  unsigned NumSavedRegs = SavedRegs.count();
+  // If we must call __arm_get_current_vg in the prologue preserve the LR.
+  if (requiresSaveVG(MF) && !Subtarget.hasSVE())
+    SavedRegs.set(AArch64::LR);
 
   // The frame record needs to be created by saving the appropriate registers
   uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
new file mode 100644
index 0000000000000..69f603458670c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s
+
+; Example of locally streaming function that (at -O0) must preserve the LR (X30)
+; before calling __arm_get_current_vg.
+define void @foo() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: foo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    rdsvl x9, #1
+; CHECK-NEXT:    lsr x9, x9, #3
+; CHECK-NEXT:    str x9, [sp, #72] // 8-byte Folded Spill
+; CHECK-NEXT:    bl __arm_get_current_vg
+; CHECK-NEXT:    str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset vg, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+  ret void
+}

From 65ce78f338cf44aee7ecfa23686851f89e036b25 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01@loongson.cn>
Date: Thu, 26 Jun 2025 18:42:25 +0800
Subject: [PATCH 71/83] [LoongArch] Pre-commit test for fixing xvshuf
 instructions. NFC

For this test, the `xvshuf.d` instruction should not be generated.

This will be fixed later.

(cherry picked from commit a19ddff980136835fead07b346bd83e9211124a0)
---
 .../LoongArch/lasx/ir-instruction/fix-xvshuf.ll | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
new file mode 100644
index 0000000000000..21067031cb7bb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+
+;; Fix https://github.com/llvm/llvm-project/issues/137000.
+
+define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: shufflevector_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_0)
+; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT:    xvshuf.d $xr2, $xr1, $xr0
+; CHECK-NEXT:    xvori.b $xr0, $xr2, 0
+; CHECK-NEXT:    ret
+entry:
+  %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>
+  ret <4 x double> %c
+}

From da18fb9f04ceee5e78b19c93ae755800b586e958 Mon Sep 17 00:00:00 2001
From: ZhaoQi <zhaoqi01@loongson.cn>
Date: Fri, 27 Jun 2025 10:29:32 +0800
Subject: [PATCH 72/83] [LoongArch] Fix xvshuf instructions lowering (#145868)

Fix https://github.com/llvm/llvm-project/issues/137000.

(cherry picked from commit 30e519e1ad185701eb9593f6c727c808d7590d1b)
---
 .../LoongArch/LoongArchISelLowering.cpp       |  2 +-
 .../lasx/ir-instruction/fix-xvshuf.ll         | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4ed3c3cf92e3e..98b7a1126e560 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1209,7 +1209,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
     if (*it < 0) // UNDEF
       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
     else if ((*it >= 0 && *it < HalfSize) ||
-             (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
+             (*it >= MaskSize && *it < MaskSize + HalfSize)) {
       int M = *it < HalfSize ? *it : *it - HalfSize;
       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
     } else
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index 21067031cb7bb..f3bec11810e9b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -6,9 +6,22 @@
 define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: shufflevector_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT:    xvshuf.d $xr2, $xr1, $xr0
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 0
+; CHECK-NEXT:    movgr2fr.d $fa2, $a0
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr1, 2
+; CHECK-NEXT:    movgr2fr.d $fa3, $a0
+; CHECK-NEXT:    movfr2gr.d $a0, $fa2
+; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 0
+; CHECK-NEXT:    movfr2gr.d $a0, $fa3
+; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 1
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 3
+; CHECK-NEXT:    movgr2fr.d $fa0, $a0
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr1, 3
+; CHECK-NEXT:    movgr2fr.d $fa1, $a0
+; CHECK-NEXT:    movfr2gr.d $a0, $fa0
+; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 2
+; CHECK-NEXT:    movfr2gr.d $a0, $fa1
+; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 3
 ; CHECK-NEXT:    xvori.b $xr0, $xr2, 0
 ; CHECK-NEXT:    ret
 entry:

From b21155f97a0a9682fdea0cb074c8f4687a4cbd35 Mon Sep 17 00:00:00 2001
From: Weining Lu <luweining@loongson.cn>
Date: Sat, 7 Jun 2025 15:10:24 +0800
Subject: [PATCH 73/83] [LoongArch] Precommit test case to show bug in
 LoongArchISelDagToDag

The optimization level should not be restored into O2.

(cherry picked from commit fcc82cfa9394b2bd4380acdcf0e2854caee5a47a)
---
 llvm/test/CodeGen/LoongArch/isel-optnone.ll | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/isel-optnone.ll

diff --git a/llvm/test/CodeGen/LoongArch/isel-optnone.ll b/llvm/test/CodeGen/LoongArch/isel-optnone.ll
new file mode 100644
index 0000000000000..d44f1405d0c18
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/isel-optnone.ll
@@ -0,0 +1,13 @@
+; REQUIRES: asserts
+; RUN: llc %s -O0 -mtriple=loongarch64 -o /dev/null -debug-only=isel 2>&1 | FileCheck %s
+
+define void @fooOptnone() #0 {
+; CHECK: Changing optimization level for Function fooOptnone
+; CHECK: Before: -O2 ; After: -O0
+
+; CHECK: Restoring optimization level for Function fooOptnone
+; CHECK: Before: -O0 ; After: -O2
+  ret void
+}
+
+attributes #0 = { nounwind optnone noinline }

From 1daceb20611fe04313cb024aa427793039267f8c Mon Sep 17 00:00:00 2001
From: Weining Lu <luweining@loongson.cn>
Date: Sat, 7 Jun 2025 11:45:39 +0800
Subject: [PATCH 74/83] [LoongArch] Pass OptLevel to LoongArchDAGToDAGISel
 correctly

Like many other targets did. And see RISCV for similar fix.

Fix https://github.com/llvm/llvm-project/issues/143239

(cherry picked from commit 90a52f4942961a5c32afc69d69470c6b7e5bcb8a)
---
 llvm/lib/Target/LoongArch/LoongArch.h                |  3 ++-
 llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp  | 10 ++++++----
 llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h    |  8 +++++---
 llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp |  2 +-
 llvm/test/CodeGen/LoongArch/O0-pipeline.ll           |  8 --------
 llvm/test/CodeGen/LoongArch/isel-optnone.ll          |  7 ++-----
 llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll |  1 +
 7 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index db60523738880..6635c57ff0476 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -35,7 +35,8 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
 
 FunctionPass *createLoongArchDeadRegisterDefinitionsPass();
 FunctionPass *createLoongArchExpandAtomicPseudoPass();
-FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
+FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM,
+                                     CodeGenOptLevel OptLevel);
 FunctionPass *createLoongArchMergeBaseOffsetOptPass();
 FunctionPass *createLoongArchOptWInstrsPass();
 FunctionPass *createLoongArchPreRAExpandPseudoPass();
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index cb0fb9bc9c7f9..7169cdc9a2bf9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -25,8 +25,9 @@ using namespace llvm;
 char LoongArchDAGToDAGISelLegacy::ID;
 
 LoongArchDAGToDAGISelLegacy::LoongArchDAGToDAGISelLegacy(
-    LoongArchTargetMachine &TM)
-    : SelectionDAGISelLegacy(ID, std::make_unique<LoongArchDAGToDAGISel>(TM)) {}
+    LoongArchTargetMachine &TM, CodeGenOptLevel OptLevel)
+    : SelectionDAGISelLegacy(
+          ID, std::make_unique<LoongArchDAGToDAGISel>(TM, OptLevel)) {}
 
 INITIALIZE_PASS(LoongArchDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false,
                 false)
@@ -456,6 +457,7 @@ bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N,
 
 // This pass converts a legalized DAG into a LoongArch-specific DAG, ready
 // for instruction scheduling.
-FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) {
-  return new LoongArchDAGToDAGISelLegacy(TM);
+FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM,
+                                           CodeGenOptLevel OptLevel) {
+  return new LoongArchDAGToDAGISelLegacy(TM, OptLevel);
 }
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 8a7eba418d804..2e6bc9951e9e7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -26,8 +26,9 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
 public:
   LoongArchDAGToDAGISel() = delete;
 
-  explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM)
-      : SelectionDAGISel(TM) {}
+  explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM,
+                                 CodeGenOptLevel OptLevel)
+      : SelectionDAGISel(TM, OptLevel) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override {
     Subtarget = &MF.getSubtarget<LoongArchSubtarget>();
@@ -71,7 +72,8 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
 class LoongArchDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
 public:
   static char ID;
-  explicit LoongArchDAGToDAGISelLegacy(LoongArchTargetMachine &TM);
+  explicit LoongArchDAGToDAGISelLegacy(LoongArchTargetMachine &TM,
+                                       CodeGenOptLevel OptLevel);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 62b08be5435cd..27f97b2cebb0c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -188,7 +188,7 @@ void LoongArchPassConfig::addCodeGenPrepare() {
 }
 
 bool LoongArchPassConfig::addInstSelector() {
-  addPass(createLoongArchISelDag(getLoongArchTargetMachine()));
+  addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel()));
 
   return false;
 }
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 24bd4c75a9821..73d0bda895de0 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -34,15 +34,7 @@
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
 ; CHECK-NEXT:       Module Verifier
-; CHECK-NEXT:       Dominator Tree Construction
-; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
-; CHECK-NEXT:       Function Alias Analysis Results
-; CHECK-NEXT:       Natural Loop Information
-; CHECK-NEXT:       Post-Dominator Tree Construction
-; CHECK-NEXT:       Branch Probability Analysis
 ; CHECK-NEXT:       Assignment Tracking Analysis
-; CHECK-NEXT:       Lazy Branch Probability Analysis
-; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       LoongArch DAG->DAG Pattern Instruction Selection
 ; CHECK-NEXT:       Finalize ISel and expand pseudo-instructions
 ; CHECK-NEXT:       Local Stack Slot Allocation
diff --git a/llvm/test/CodeGen/LoongArch/isel-optnone.ll b/llvm/test/CodeGen/LoongArch/isel-optnone.ll
index d44f1405d0c18..4d2528a3148ac 100644
--- a/llvm/test/CodeGen/LoongArch/isel-optnone.ll
+++ b/llvm/test/CodeGen/LoongArch/isel-optnone.ll
@@ -2,11 +2,8 @@
 ; RUN: llc %s -O0 -mtriple=loongarch64 -o /dev/null -debug-only=isel 2>&1 | FileCheck %s
 
 define void @fooOptnone() #0 {
-; CHECK: Changing optimization level for Function fooOptnone
-; CHECK: Before: -O2 ; After: -O0
-
-; CHECK: Restoring optimization level for Function fooOptnone
-; CHECK: Before: -O0 ; After: -O2
+; CHECK-NOT: Changing optimization level for Function fooOptnone
+; CHECK-NOT: Restoring optimization level for Function fooOptnone
   ret void
 }
 
diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
index 08534e307e4e0..c1b1c1f7568bb 100644
--- a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
+++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll
@@ -39,6 +39,7 @@ define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" {
 ; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:  .LBB0_3: # %if.end
 ; CHECK-NEXT:    ld.d $a0, $fp, -48 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.w $a0, $a0, 0
 ; CHECK-NEXT:    ori $a1, $zero, 1
 ; CHECK-NEXT:    bne $a0, $a1, .LBB0_6
 ; CHECK-NEXT:    b .LBB0_4

From 9af763f038f7cf74335cf74eadc5eea4b2026f44 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Fri, 27 Jun 2025 16:56:17 -0400
Subject: [PATCH 75/83] [gtest] Fix building on OpenBSD/sparc64 (#145225)

Cherry pick a patch from 1.15.0

Add missing include for raise(3)

https://github.com/google/googletest/commit/7f036c5563af7d0329f20e8bb42effb04629f0c0
(cherry picked from commit 68239b76f139e44d24f3949383e3fd4bf389e1c9)
---
 third-party/unittest/googletest/src/gtest.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/third-party/unittest/googletest/src/gtest.cc b/third-party/unittest/googletest/src/gtest.cc
index 30a5cc3f83a7e..37d380a789831 100644
--- a/third-party/unittest/googletest/src/gtest.cc
+++ b/third-party/unittest/googletest/src/gtest.cc
@@ -43,6 +43,7 @@
 #include <algorithm>
 #include <chrono>  // NOLINT
 #include <cmath>
+#include <csignal>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>

From 0de59a293f7a7b941a17c1222d8ec5a3a0cad9ae Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang@intel.com>
Date: Tue, 18 Mar 2025 13:04:23 +0100
Subject: [PATCH 76/83] [X86] Ignore NSW when DstSVT is i32 (#131755)

We don't have PACKSS for i64->i32.

Fixes: https://godbolt.org/z/qb8nxnPbK, which was introduced by ddd2f57b
(cherry picked from commit 3d631914677b58a5479b310f480ac76e27d41e7e)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp      |  3 +-
 llvm/test/CodeGen/X86/vector-trunc-nowrap.ll | 88 ++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4413fbb77f415..12c40b501f627 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20889,7 +20889,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
     return SDValue();
 
   unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
-  if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) {
+  if ((Flags.hasNoSignedWrap() && DstSVT != MVT::i32) ||
+      MinSignBits < NumSignBits) {
     PackOpcode = X86ISD::PACKSS;
     return In;
   }
diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
index 2b8eedfbbdc9c..863f30e03d2d6 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
@@ -1592,3 +1592,91 @@ entry:
   %1 = bitcast <8 x i8> %0 to i64
   ret i64 %1
 }
+
+define void @foo(<4 x i64> %a, <4 x i64> %b, ptr %p) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
+; SSE-LABEL: foo:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT:    movaps %xmm2, 16(%rdi)
+; SSE-NEXT:    movaps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: foo:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX1-NEXT:    vmovaps %xmm1, 16(%rdi)
+; AVX1-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-SLOW-LABEL: foo:
+; AVX2-SLOW:       # %bb.0: # %entry
+; AVX2-SLOW-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX2-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX2-SLOW-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX2-SLOW-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX2-SLOW-NEXT:    vmovaps %xmm1, 16(%rdi)
+; AVX2-SLOW-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX2-SLOW-NEXT:    vzeroupper
+; AVX2-SLOW-NEXT:    retq
+;
+; AVX2-FAST-ALL-LABEL: foo:
+; AVX2-FAST-ALL:       # %bb.0: # %entry
+; AVX2-FAST-ALL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; AVX2-FAST-ALL-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-ALL-NEXT:    vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-ALL-NEXT:    vmovaps %xmm1, 16(%rdi)
+; AVX2-FAST-ALL-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX2-FAST-ALL-NEXT:    vzeroupper
+; AVX2-FAST-ALL-NEXT:    retq
+;
+; AVX2-FAST-PERLANE-LABEL: foo:
+; AVX2-FAST-PERLANE:       # %bb.0: # %entry
+; AVX2-FAST-PERLANE-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX2-FAST-PERLANE-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX2-FAST-PERLANE-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX2-FAST-PERLANE-NEXT:    vmovaps %xmm1, 16(%rdi)
+; AVX2-FAST-PERLANE-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX2-FAST-PERLANE-NEXT:    vzeroupper
+; AVX2-FAST-PERLANE-NEXT:    retq
+;
+; AVX512F-LABEL: foo:
+; AVX512F:       # %bb.0: # %entry
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpmovqd %zmm0, (%rdi)
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: foo:
+; AVX512VL:       # %bb.0: # %entry
+; AVX512VL-NEXT:    vpmovqd %ymm1, 16(%rdi)
+; AVX512VL-NEXT:    vpmovqd %ymm0, (%rdi)
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: foo:
+; AVX512BW:       # %bb.0: # %entry
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovqd %zmm0, (%rdi)
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512BWVL-LABEL: foo:
+; AVX512BWVL:       # %bb.0: # %entry
+; AVX512BWVL-NEXT:    vpmovqd %ymm1, 16(%rdi)
+; AVX512BWVL-NEXT:    vpmovqd %ymm0, (%rdi)
+; AVX512BWVL-NEXT:    vzeroupper
+; AVX512BWVL-NEXT:    retq
+entry:
+  %0 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = trunc nsw <8 x i64> %0 to <8 x i32>
+  store <8 x i32> %1, ptr %p, align 16
+  ret void
+}

From ce455b382c08425d4ed44c03c8129a6f150decb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez=20Troiti=C3=B1o?=
 <danielrodriguez@meta.com>
Date: Mon, 16 Jun 2025 12:06:25 -0700
Subject: [PATCH 77/83] [objcopy][MachO] Revert special handling of encryptable
 binaries (#144058)

Code originally added in #120995 and later corrected in #130517 but
apparently still not correct according to #141494 and
rust-lang/rust#141913.

Revert the special handling because the test written in #120995 and
#130517 still passes without those changes. Kept the test and improved
it with a `__DATA` section to keep the current behaviour checked in case
other changes modify the behaviour and break this edge case.

(cherry picked from commit a0662ceba83cf8782da4047b8ee6d175591f168f)
---
 llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp |   8 -
 llvm/lib/ObjCopy/MachO/MachOObject.cpp        |   4 -
 llvm/lib/ObjCopy/MachO/MachOObject.h          |   3 -
 llvm/lib/ObjCopy/MachO/MachOReader.cpp        |   4 -
 .../MachO/strip-with-encryption-info.test     | 156 ++++++++++++------
 5 files changed, 106 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
index 8ecd669e67178..93bc6631e64c8 100644
--- a/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
@@ -116,10 +116,6 @@ uint64_t MachOLayoutBuilder::layoutSegments() {
   const bool IsObjectFile =
       O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
   uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
-  // If we are emitting an encryptable binary, our load commands must have a
-  // separate (non-encrypted) page to themselves.
-  bool RequiresFirstSectionOutsideFirstPage =
-      O.EncryptionInfoCommandIndex.has_value();
   for (LoadCommand &LC : O.LoadCommands) {
     auto &MLC = LC.MachOLoadCommand;
     StringRef Segname;
@@ -173,10 +169,6 @@ uint64_t MachOLayoutBuilder::layoutSegments() {
         if (!Sec->hasValidOffset()) {
           Sec->Offset = 0;
         } else {
-          if (RequiresFirstSectionOutsideFirstPage) {
-            SectOffset = alignToPowerOf2(SectOffset, PageSize);
-            RequiresFirstSectionOutsideFirstPage = false;
-          }
           Sec->Offset = SegOffset + SectOffset;
           Sec->Size = Sec->Content.size();
           SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.cpp b/llvm/lib/ObjCopy/MachO/MachOObject.cpp
index e0819d89d24ff..8d2c02dc37c99 100644
--- a/llvm/lib/ObjCopy/MachO/MachOObject.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOObject.cpp
@@ -98,10 +98,6 @@ void Object::updateLoadCommandIndexes() {
     case MachO::LC_DYLD_EXPORTS_TRIE:
       ExportsTrieCommandIndex = Index;
       break;
-    case MachO::LC_ENCRYPTION_INFO:
-    case MachO::LC_ENCRYPTION_INFO_64:
-      EncryptionInfoCommandIndex = Index;
-      break;
     }
   }
 }
diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.h b/llvm/lib/ObjCopy/MachO/MachOObject.h
index 79eb0133c2802..a454c4f502fd6 100644
--- a/llvm/lib/ObjCopy/MachO/MachOObject.h
+++ b/llvm/lib/ObjCopy/MachO/MachOObject.h
@@ -341,9 +341,6 @@ struct Object {
   /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command
   /// corresponding to the __TEXT segment.
   std::optional<size_t> TextSegmentCommandIndex;
-  /// The index of the LC_ENCRYPTION_INFO or LC_ENCRYPTION_INFO_64 load command
-  /// if present.
-  std::optional<size_t> EncryptionInfoCommandIndex;
 
   BumpPtrAllocator Alloc;
   StringSaver NewSectionsContents;
diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.cpp b/llvm/lib/ObjCopy/MachO/MachOReader.cpp
index ef0e0262f9395..2b344f36d8e78 100644
--- a/llvm/lib/ObjCopy/MachO/MachOReader.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOReader.cpp
@@ -184,10 +184,6 @@ Error MachOReader::readLoadCommands(Object &O) const {
     case MachO::LC_DYLD_CHAINED_FIXUPS:
       O.ChainedFixupsCommandIndex = O.LoadCommands.size();
       break;
-    case MachO::LC_ENCRYPTION_INFO:
-    case MachO::LC_ENCRYPTION_INFO_64:
-      O.EncryptionInfoCommandIndex = O.LoadCommands.size();
-      break;
     }
 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
   case MachO::LCName:                                                          \
diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test b/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test
index 2b2bd670613de..d6f6fe10d88c2 100644
--- a/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test
+++ b/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test
@@ -16,7 +16,11 @@
 # CHECK:       fileoff: 0
 
 # The YAML below is the following code
+# ```
+# static int foo = 12345;
+# int bar = 4567;
 # int main(int argc, char **argv) { return 0; }
+# ```
 # Compiled on macOS against the macOS SDK and passing `-Wl,-encryptable`
 # Contents are removed, since they are not important for the test. We need a
 # small text segment (smaller than a page).
@@ -26,8 +30,8 @@ FileHeader:
   cputype:         0x100000C
   cpusubtype:      0x0
   filetype:        0x2
-  ncmds:           15
-  sizeofcmds:      696
+  ncmds:           18
+  sizeofcmds:      920
   flags:           0x200085
   reserved:        0x0
 LoadCommands:
@@ -69,7 +73,7 @@ LoadCommands:
       - sectname:        __unwind_info
         segname:         __TEXT
         addr:            0x100004020
-        size:            4152
+        size:            88
         offset:          0x4020
         align:           2
         reloff:          0x0
@@ -79,37 +83,61 @@ LoadCommands:
         reserved2:       0x0
         reserved3:       0x0
   - cmd:             LC_SEGMENT_64
-    cmdsize:         72
-    segname:         __LINKEDIT
+    cmdsize:         152
+    segname:         __DATA
     vmaddr:          4295000064
-    vmsize:          592
+    vmsize:          16384
     fileoff:         32768
-    filesize:        592
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __data
+        segname:         __DATA
+        addr:            0x100008000
+        size:            4
+        offset:          0x8000
+        align:           2
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          4295016448
+    vmsize:          16384
+    fileoff:         49152
+    filesize:        768
     maxprot:         1
     initprot:        1
     nsects:          0
     flags:           0
   - cmd:             LC_DYLD_CHAINED_FIXUPS
     cmdsize:         16
-    dataoff:         32768
-    datasize:        48
+    dataoff:         49152
+    datasize:        56
   - cmd:             LC_DYLD_EXPORTS_TRIE
     cmdsize:         16
-    dataoff:         32816
-    datasize:        48
+    dataoff:         49208
+    datasize:        64
   - cmd:             LC_SYMTAB
     cmdsize:         24
-    symoff:          32872
-    nsyms:           2
-    stroff:          32904
-    strsize:         32
+    symoff:          49280
+    nsyms:           3
+    stroff:          49328
+    strsize:         40
   - cmd:             LC_DYSYMTAB
     cmdsize:         80
     ilocalsym:       0
     nlocalsym:       0
     iextdefsym:      0
-    nextdefsym:      2
-    iundefsym:       2
+    nextdefsym:      3
+    iundefsym:       3
     nundefsym:       0
     tocoff:          0
     ntoc:            0
@@ -123,12 +151,6 @@ LoadCommands:
     nextrel:         0
     locreloff:       0
     nlocrel:         0
-  - cmd:             LC_ENCRYPTION_INFO_64
-    cmdsize:         24
-    cryptoff:        16384
-    cryptsize:       16384
-    cryptid:         0
-    pad:             0
   - cmd:             LC_LOAD_DYLINKER
     cmdsize:         32
     name:            12
@@ -136,32 +158,50 @@ LoadCommands:
     ZeroPadBytes:    7
   - cmd:             LC_UUID
     cmdsize:         24
-    uuid:            4C4C4447-5555-3144-A18A-01E9EB7E7D92
+    uuid:            ADDA943C-657A-3A49-9580-168E17A40FFB
   - cmd:             LC_BUILD_VERSION
     cmdsize:         32
     platform:        1
     minos:           983040
-    sdk:             983552
+    sdk:             984320
     ntools:          1
     Tools:
-      - tool:            4
-        version:         1310720
+      - tool:            3
+        version:         76481537
+  - cmd:             LC_SOURCE_VERSION
+    cmdsize:         16
+    version:         0
   - cmd:             LC_MAIN
     cmdsize:         24
     entryoff:        16384
     stacksize:       0
+  - cmd:             LC_ENCRYPTION_INFO_64
+    cmdsize:         24
+    cryptoff:        16384
+    cryptsize:       16384
+    cryptid:         0
+    pad:             0
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 88539136
+      compatibility_version: 65536
+    Content:         '/usr/lib/libSystem.B.dylib'
+    ZeroPadBytes:    6
   - cmd:             LC_FUNCTION_STARTS
     cmdsize:         16
-    dataoff:         32864
+    dataoff:         49272
     datasize:        8
   - cmd:             LC_DATA_IN_CODE
     cmdsize:         16
-    dataoff:         32872
+    dataoff:         49280
     datasize:        0
   - cmd:             LC_CODE_SIGNATURE
     cmdsize:         16
-    dataoff:         32944
-    datasize:        416
+    dataoff:         49376
+    datasize:        544
 LinkEditData:
   ExportTrie:
     TerminalSize:    0
@@ -173,51 +213,67 @@ LinkEditData:
     ImportName:      ''
     Children:
       - TerminalSize:    0
-        NodeOffset:      5
+        NodeOffset:      25
         Name:            _
         Flags:           0x0
         Address:         0x0
         Other:           0x0
         ImportName:      ''
         Children:
+          - TerminalSize:    2
+            NodeOffset:      9
+            Name:            _mh_execute_header
+            Flags:           0x0
+            Address:         0x0
+            Other:           0x0
+            ImportName:      ''
           - TerminalSize:    4
-            NodeOffset:      33
-            Name:            main
+            NodeOffset:      13
+            Name:            bar
             Flags:           0x0
-            Address:         0x4000
+            Address:         0x8000
             Other:           0x0
             ImportName:      ''
-          - TerminalSize:    2
-            NodeOffset:      39
-            Name:            _mh_execute_header
+          - TerminalSize:    4
+            NodeOffset:      19
+            Name:            main
             Flags:           0x0
-            Address:         0x0
+            Address:         0x4000
             Other:           0x0
             ImportName:      ''
   NameList:
     - n_strx:          2
       n_type:          0xF
       n_sect:          1
+      n_desc:          16
+      n_value:         4294967296
+    - n_strx:          22
+      n_type:          0xF
+      n_sect:          3
       n_desc:          0
-      n_value:         4294983680
-    - n_strx:          8
+      n_value:         4295000064
+    - n_strx:          27
       n_type:          0xF
       n_sect:          1
-      n_desc:          16
-      n_value:         4294967296
+      n_desc:          0
+      n_value:         4294983680
   StringTable:
     - ' '
-    - _main
     - __mh_execute_header
+    - _bar
+    - _main
+    - ''
+    - ''
+    - ''
     - ''
     - ''
     - ''
     - ''
   FunctionStarts:  [ 0x4000 ]
-  ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 
-                     0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
-                     0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
-                     0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 
-                     0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ]
+  ChainedFixups:   [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x34, 0x0,
+                     0x0, 0x0, 0x34, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                     0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                     0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                     0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                     0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ]
 ...
-

From fa792cd4c630b1b85a599f19204d1f36e0fa3a41 Mon Sep 17 00:00:00 2001
From: dianqk <dianqk@dianqk.net>
Date: Wed, 25 Jun 2025 18:39:36 +0800
Subject: [PATCH 78/83] [AsmPrinter] Always emit global equivalents if there is
 non-global uses (#145648)

A case found from https://github.com/rust-lang/rust/issues/142752:
https://llvm.godbolt.org/z/T7ce9saWh.

We should emit `@bar_0` for the following code:

```llvm
target triple = "x86_64-unknown-linux-gnu"

@rel_0 = private unnamed_addr constant [1 x i32] [
  i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_0 to i64), i64 ptrtoint (ptr @rel_0 to i64)) to i32)]
@bar_0 = internal unnamed_addr constant ptr @foo_0, align 8
@foo_0 = external global ptr, align 8

define void @foo(ptr %arg0) {
  store ptr @bar_0, ptr %arg0, align 8
  ret void
}
```

(cherry picked from commit 630d55cce45f8b409367914ef372047c8c43c511)
---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 24 ++++++++++-----
 llvm/test/MC/X86/gotpcrel-non-globals.ll   | 36 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/MC/X86/gotpcrel-non-globals.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e77abf429e6b4..c8f567e5f4195 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2139,16 +2139,20 @@ void AsmPrinter::emitFunctionBody() {
 }
 
 /// Compute the number of Global Variables that uses a Constant.
-static unsigned getNumGlobalVariableUses(const Constant *C) {
-  if (!C)
+static unsigned getNumGlobalVariableUses(const Constant *C,
+                                         bool &HasNonGlobalUsers) {
+  if (!C) {
+    HasNonGlobalUsers = true;
     return 0;
+  }
 
   if (isa<GlobalVariable>(C))
     return 1;
 
   unsigned NumUses = 0;
   for (const auto *CU : C->users())
-    NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
+    NumUses +=
+        getNumGlobalVariableUses(dyn_cast<Constant>(CU), HasNonGlobalUsers);
 
   return NumUses;
 }
@@ -2159,7 +2163,8 @@ static unsigned getNumGlobalVariableUses(const Constant *C) {
 /// candidates are skipped and are emitted later in case at least one cstexpr
 /// isn't replaced by a PC relative GOT entry access.
 static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
-                                     unsigned &NumGOTEquivUsers) {
+                                     unsigned &NumGOTEquivUsers,
+                                     bool &HasNonGlobalUsers) {
   // Global GOT equivalents are unnamed private globals with a constant
   // pointer initializer to another global symbol. They must point to a
   // GlobalVariable or Function, i.e., as GlobalValue.
@@ -2171,7 +2176,8 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
   // To be a got equivalent, at least one of its users need to be a constant
   // expression used by another global variable.
   for (const auto *U : GV->users())
-    NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
+    NumGOTEquivUsers +=
+        getNumGlobalVariableUses(dyn_cast<Constant>(U), HasNonGlobalUsers);
 
   return NumGOTEquivUsers > 0;
 }
@@ -2189,9 +2195,13 @@ void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
 
   for (const auto &G : M.globals()) {
     unsigned NumGOTEquivUsers = 0;
-    if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
+    bool HasNonGlobalUsers = false;
+    if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers, HasNonGlobalUsers))
       continue;
-
+    // If non-global variables use it, we still need to emit it.
+    // Add 1 here, then emit it in `emitGlobalGOTEquivs`.
+    if (HasNonGlobalUsers)
+      NumGOTEquivUsers += 1;
     const MCSymbol *GOTEquivSym = getSymbol(&G);
     GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
   }
diff --git a/llvm/test/MC/X86/gotpcrel-non-globals.ll b/llvm/test/MC/X86/gotpcrel-non-globals.ll
new file mode 100644
index 0000000000000..222d2d73ff728
--- /dev/null
+++ b/llvm/test/MC/X86/gotpcrel-non-globals.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that we emit the `@bar_*` symbols, and that we don't emit multiple symbols.
+
+; CHECK-LABEL: .Lrel_0:
+; CHECK: .long   foo_0@GOTPCREL+0
+; CHECK-LABEL: .Lrel_1_failed:
+; CHECK: .long   bar_1-foo_0
+; CHECK-LABEL: .Lrel_2:
+; CHECK: .long   foo_2@GOTPCREL+0
+
+; CHECK: bar_0:
+; CHECK: bar_1:
+; CHECK: bar_2_indirect:
+
+@rel_0 = private unnamed_addr constant [1 x i32] [
+  i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_0 to i64), i64 ptrtoint (ptr @rel_0 to i64)) to i32)]
+@rel_1_failed = private unnamed_addr constant [1 x i32] [
+  i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_1 to i64), i64 ptrtoint (ptr @foo_0 to i64)) to i32)]
+@rel_2 = private unnamed_addr constant [1 x i32] [
+  i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_2_indirect to i64), i64 ptrtoint (ptr @rel_2 to i64)) to i32)]
+@bar_0 = internal unnamed_addr constant ptr @foo_0, align 8
+@bar_1 = internal unnamed_addr constant ptr @foo_1, align 8
+@bar_2_indirect = internal unnamed_addr constant ptr @foo_2, align 8
+@foo_0 = external global ptr, align 8
+@foo_1 = external global ptr, align 8
+@foo_2 = external global ptr, align 8
+
+define void @foo(ptr %arg0, ptr %arg1) {
+  store ptr @bar_0, ptr %arg0, align 8
+  store ptr @bar_1, ptr %arg1, align 8
+  store ptr getelementptr (i8, ptr @bar_2_indirect, i32 1), ptr %arg1, align 8
+  ret void
+}

From 0c9f909b7976dc4a1643bf708484dc63725ecb21 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Wed, 18 Jun 2025 12:58:17 +0100
Subject: [PATCH 79/83] [AArch64][SME] Fix restoring callee-saves from FP with
 hazard padding (#143371)

Currently, when hazard-padding is enabled a (fixed-size) hazard slot is
placed in the CS area, just after the frame record. The size of this
slot is part of the "CalleeSaveBaseToFrameRecordOffset". The SVE
epilogue emission code assumed this offset was always zero, and
incorrectly setting the stack pointer, resulting in all SVE registers
being reloaded from incorrect offsets.

```
| prev_lr                           |
| prev_fp                           |
| (a.k.a. "frame record")           |
|-----------------------------------| <- fp(=x29)
|   <hazard padding>                |
|-----------------------------------| <- callee-saved base
|                                   |
| callee-saved fp/simd/SVE regs     |
|                                   |
|-----------------------------------| <- SVE callee-save base
```

i.e. in the above diagram, the code assumed `fp == callee-saved base`.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |   37 +-
 llvm/test/CodeGen/AArch64/stack-hazard.ll     | 1173 +++++++++++++++++
 2 files changed, 1198 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 74b80438a28b2..ce947951b26ef 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2501,20 +2501,33 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 
   // Deallocate the SVE area.
   if (SVEStackSize) {
-    // If we have stack realignment or variable sized objects on the stack,
-    // restore the stack pointer from the frame pointer prior to SVE CSR
-    // restoration.
-    if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
-      if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
-        // Set SP to start of SVE callee-save area from which they can
-        // be reloaded. The code below will deallocate the stack space
-        // space by moving FP -> SP.
-        emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
-                        StackOffset::getScalable(-CalleeSavedSize), TII,
+    int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
+    // If we have stack realignment or variable-sized objects we must use the
+    // FP to restore SVE callee saves (as there is an unknown amount of
+    // data/padding between the SP and SVE CS area).
+    Register BaseForSVEDealloc =
+        (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
+                                                              : AArch64::SP;
+    if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
+      Register CalleeSaveBase = AArch64::FP;
+      if (int64_t CalleeSaveBaseOffset =
+              AFI->getCalleeSaveBaseToFrameRecordOffset()) {
+        // If we have have an non-zero offset to the non-SVE CS base we need to
+        // compute the base address by subtracting the offest in a temporary
+        // register first (to avoid briefly deallocating the SVE CS).
+        CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
+            &AArch64::GPR64RegClass);
+        emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
+                        StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
                         MachineInstr::FrameDestroy);
       }
-    } else {
-      if (AFI->getSVECalleeSavedStackSize()) {
+      // The code below will deallocate the stack space space by moving the
+      // SP to the start of the SVE callee-save area.
+      emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
+                      StackOffset::getScalable(-SVECalleeSavedSize), TII,
+                      MachineInstr::FrameDestroy);
+    } else if (BaseForSVEDealloc == AArch64::SP) {
+      if (SVECalleeSavedSize) {
         // Deallocate the non-SVE locals first before we can deallocate (and
         // restore callee saves) from the SVE area.
         emitFrameOffset(
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index df4918493edf8..fcedcb8e24222 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -3152,3 +3152,1176 @@ entry:
   call void @bar(ptr noundef nonnull %b)
   ret i32 0
 }
+
+
+define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: svecc_call_dynamic_alloca:
+; CHECK0:       // %bb.0: // %entry
+; CHECK0-NEXT:    stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK0-NEXT:    .cfi_def_cfa_offset 64
+; CHECK0-NEXT:    cntd x9
+; CHECK0-NEXT:    stp x27, x26, [sp, #32] // 16-byte Folded Spill
+; CHECK0-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
+; CHECK0-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK0-NEXT:    mov x29, sp
+; CHECK0-NEXT:    .cfi_def_cfa w29, 64
+; CHECK0-NEXT:    .cfi_offset w19, -8
+; CHECK0-NEXT:    .cfi_offset w20, -16
+; CHECK0-NEXT:    .cfi_offset w26, -24
+; CHECK0-NEXT:    .cfi_offset w27, -32
+; CHECK0-NEXT:    .cfi_offset w28, -40
+; CHECK0-NEXT:    .cfi_offset w30, -56
+; CHECK0-NEXT:    .cfi_offset w29, -64
+; CHECK0-NEXT:    addvl sp, sp, #-18
+; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
+; CHECK0-NEXT:    mov w9, w0
+; CHECK0-NEXT:    mov x8, sp
+; CHECK0-NEXT:    mov w2, w1
+; CHECK0-NEXT:    add x9, x9, #15
+; CHECK0-NEXT:    mov x19, sp
+; CHECK0-NEXT:    and x9, x9, #0x1fffffff0
+; CHECK0-NEXT:    sub x8, x8, x9
+; CHECK0-NEXT:    mov sp, x8
+; CHECK0-NEXT:    //APP
+; CHECK0-NEXT:    //NO_APP
+; CHECK0-NEXT:    bl __arm_sme_state
+; CHECK0-NEXT:    and x20, x0, #0x1
+; CHECK0-NEXT:    .cfi_offset vg, -48
+; CHECK0-NEXT:    tbz w20, #0, .LBB35_2
+; CHECK0-NEXT:  // %bb.1: // %entry
+; CHECK0-NEXT:    smstop sm
+; CHECK0-NEXT:  .LBB35_2: // %entry
+; CHECK0-NEXT:    mov x0, x8
+; CHECK0-NEXT:    mov w1, #45 // =0x2d
+; CHECK0-NEXT:    bl memset
+; CHECK0-NEXT:    tbz w20, #0, .LBB35_4
+; CHECK0-NEXT:  // %bb.3: // %entry
+; CHECK0-NEXT:    smstart sm
+; CHECK0-NEXT:  .LBB35_4: // %entry
+; CHECK0-NEXT:    mov w0, #22647 // =0x5877
+; CHECK0-NEXT:    movk w0, #59491, lsl #16
+; CHECK0-NEXT:    .cfi_restore vg
+; CHECK0-NEXT:    addvl sp, x29, #-18
+; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    .cfi_restore z8
+; CHECK0-NEXT:    .cfi_restore z9
+; CHECK0-NEXT:    .cfi_restore z10
+; CHECK0-NEXT:    .cfi_restore z11
+; CHECK0-NEXT:    .cfi_restore z12
+; CHECK0-NEXT:    .cfi_restore z13
+; CHECK0-NEXT:    .cfi_restore z14
+; CHECK0-NEXT:    .cfi_restore z15
+; CHECK0-NEXT:    mov sp, x29
+; CHECK0-NEXT:    .cfi_def_cfa wsp, 64
+; CHECK0-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
+; CHECK0-NEXT:    ldp x27, x26, [sp, #32] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK0-NEXT:    .cfi_def_cfa_offset 0
+; CHECK0-NEXT:    .cfi_restore w19
+; CHECK0-NEXT:    .cfi_restore w20
+; CHECK0-NEXT:    .cfi_restore w26
+; CHECK0-NEXT:    .cfi_restore w27
+; CHECK0-NEXT:    .cfi_restore w28
+; CHECK0-NEXT:    .cfi_restore w30
+; CHECK0-NEXT:    .cfi_restore w29
+; CHECK0-NEXT:    ret
+;
+; CHECK64-LABEL: svecc_call_dynamic_alloca:
+; CHECK64:       // %bb.0: // %entry
+; CHECK64-NEXT:    sub sp, sp, #128
+; CHECK64-NEXT:    .cfi_def_cfa_offset 128
+; CHECK64-NEXT:    cntd x9
+; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT:    stp x27, x26, [sp, #96] // 16-byte Folded Spill
+; CHECK64-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK64-NEXT:    add x29, sp, #64
+; CHECK64-NEXT:    .cfi_def_cfa w29, 64
+; CHECK64-NEXT:    .cfi_offset w19, -8
+; CHECK64-NEXT:    .cfi_offset w20, -16
+; CHECK64-NEXT:    .cfi_offset w26, -24
+; CHECK64-NEXT:    .cfi_offset w27, -32
+; CHECK64-NEXT:    .cfi_offset w28, -40
+; CHECK64-NEXT:    .cfi_offset w30, -56
+; CHECK64-NEXT:    .cfi_offset w29, -64
+; CHECK64-NEXT:    addvl sp, sp, #-18
+; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
+; CHECK64-NEXT:    sub sp, sp, #64
+; CHECK64-NEXT:    mov w9, w0
+; CHECK64-NEXT:    mov x8, sp
+; CHECK64-NEXT:    mov w2, w1
+; CHECK64-NEXT:    add x9, x9, #15
+; CHECK64-NEXT:    mov x19, sp
+; CHECK64-NEXT:    and x9, x9, #0x1fffffff0
+; CHECK64-NEXT:    sub x8, x8, x9
+; CHECK64-NEXT:    mov sp, x8
+; CHECK64-NEXT:    //APP
+; CHECK64-NEXT:    //NO_APP
+; CHECK64-NEXT:    bl __arm_sme_state
+; CHECK64-NEXT:    and x20, x0, #0x1
+; CHECK64-NEXT:    .cfi_offset vg, -48
+; CHECK64-NEXT:    tbz w20, #0, .LBB35_2
+; CHECK64-NEXT:  // %bb.1: // %entry
+; CHECK64-NEXT:    smstop sm
+; CHECK64-NEXT:  .LBB35_2: // %entry
+; CHECK64-NEXT:    mov x0, x8
+; CHECK64-NEXT:    mov w1, #45 // =0x2d
+; CHECK64-NEXT:    bl memset
+; CHECK64-NEXT:    tbz w20, #0, .LBB35_4
+; CHECK64-NEXT:  // %bb.3: // %entry
+; CHECK64-NEXT:    smstart sm
+; CHECK64-NEXT:  .LBB35_4: // %entry
+; CHECK64-NEXT:    mov w0, #22647 // =0x5877
+; CHECK64-NEXT:    movk w0, #59491, lsl #16
+; CHECK64-NEXT:    .cfi_restore vg
+; CHECK64-NEXT:    sub x8, x29, #64
+; CHECK64-NEXT:    addvl sp, x8, #-18
+; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    .cfi_restore z8
+; CHECK64-NEXT:    .cfi_restore z9
+; CHECK64-NEXT:    .cfi_restore z10
+; CHECK64-NEXT:    .cfi_restore z11
+; CHECK64-NEXT:    .cfi_restore z12
+; CHECK64-NEXT:    .cfi_restore z13
+; CHECK64-NEXT:    .cfi_restore z14
+; CHECK64-NEXT:    .cfi_restore z15
+; CHECK64-NEXT:    sub sp, x29, #64
+; CHECK64-NEXT:    .cfi_def_cfa wsp, 128
+; CHECK64-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr x28, [sp, #88] // 8-byte Folded Reload
+; CHECK64-NEXT:    ldp x27, x26, [sp, #96] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK64-NEXT:    add sp, sp, #128
+; CHECK64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK64-NEXT:    .cfi_restore w19
+; CHECK64-NEXT:    .cfi_restore w20
+; CHECK64-NEXT:    .cfi_restore w26
+; CHECK64-NEXT:    .cfi_restore w27
+; CHECK64-NEXT:    .cfi_restore w28
+; CHECK64-NEXT:    .cfi_restore w30
+; CHECK64-NEXT:    .cfi_restore w29
+; CHECK64-NEXT:    ret
+;
+; CHECK1024-LABEL: svecc_call_dynamic_alloca:
+; CHECK1024:       // %bb.0: // %entry
+; CHECK1024-NEXT:    sub sp, sp, #1088
+; CHECK1024-NEXT:    .cfi_def_cfa_offset 1088
+; CHECK1024-NEXT:    cntd x9
+; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x9, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x28, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x27, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x26, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x20, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x19, [sp, #1080] // 8-byte Folded Spill
+; CHECK1024-NEXT:    add x29, sp, #1024
+; CHECK1024-NEXT:    .cfi_def_cfa w29, 64
+; CHECK1024-NEXT:    .cfi_offset w19, -8
+; CHECK1024-NEXT:    .cfi_offset w20, -16
+; CHECK1024-NEXT:    .cfi_offset w26, -24
+; CHECK1024-NEXT:    .cfi_offset w27, -32
+; CHECK1024-NEXT:    .cfi_offset w28, -40
+; CHECK1024-NEXT:    .cfi_offset w30, -56
+; CHECK1024-NEXT:    .cfi_offset w29, -64
+; CHECK1024-NEXT:    addvl sp, sp, #-18
+; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
+; CHECK1024-NEXT:    sub sp, sp, #1024
+; CHECK1024-NEXT:    mov w9, w0
+; CHECK1024-NEXT:    mov x8, sp
+; CHECK1024-NEXT:    mov w2, w1
+; CHECK1024-NEXT:    add x9, x9, #15
+; CHECK1024-NEXT:    mov x19, sp
+; CHECK1024-NEXT:    and x9, x9, #0x1fffffff0
+; CHECK1024-NEXT:    sub x8, x8, x9
+; CHECK1024-NEXT:    mov sp, x8
+; CHECK1024-NEXT:    //APP
+; CHECK1024-NEXT:    //NO_APP
+; CHECK1024-NEXT:    bl __arm_sme_state
+; CHECK1024-NEXT:    and x20, x0, #0x1
+; CHECK1024-NEXT:    .cfi_offset vg, -48
+; CHECK1024-NEXT:    tbz w20, #0, .LBB35_2
+; CHECK1024-NEXT:  // %bb.1: // %entry
+; CHECK1024-NEXT:    smstop sm
+; CHECK1024-NEXT:  .LBB35_2: // %entry
+; CHECK1024-NEXT:    mov x0, x8
+; CHECK1024-NEXT:    mov w1, #45 // =0x2d
+; CHECK1024-NEXT:    bl memset
+; CHECK1024-NEXT:    tbz w20, #0, .LBB35_4
+; CHECK1024-NEXT:  // %bb.3: // %entry
+; CHECK1024-NEXT:    smstart sm
+; CHECK1024-NEXT:  .LBB35_4: // %entry
+; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
+; CHECK1024-NEXT:    movk w0, #59491, lsl #16
+; CHECK1024-NEXT:    .cfi_restore vg
+; CHECK1024-NEXT:    sub x8, x29, #1024
+; CHECK1024-NEXT:    addvl sp, x8, #-18
+; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    .cfi_restore z8
+; CHECK1024-NEXT:    .cfi_restore z9
+; CHECK1024-NEXT:    .cfi_restore z10
+; CHECK1024-NEXT:    .cfi_restore z11
+; CHECK1024-NEXT:    .cfi_restore z12
+; CHECK1024-NEXT:    .cfi_restore z13
+; CHECK1024-NEXT:    .cfi_restore z14
+; CHECK1024-NEXT:    .cfi_restore z15
+; CHECK1024-NEXT:    sub sp, x29, #1024
+; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1088
+; CHECK1024-NEXT:    ldr x19, [sp, #1080] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x20, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x26, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x27, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x28, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT:    add sp, sp, #1088
+; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
+; CHECK1024-NEXT:    .cfi_restore w19
+; CHECK1024-NEXT:    .cfi_restore w20
+; CHECK1024-NEXT:    .cfi_restore w26
+; CHECK1024-NEXT:    .cfi_restore w27
+; CHECK1024-NEXT:    .cfi_restore w28
+; CHECK1024-NEXT:    .cfi_restore w30
+; CHECK1024-NEXT:    .cfi_restore w29
+; CHECK1024-NEXT:    ret
+entry:
+  %ptr = alloca i8, i32 %P1
+  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+  %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2)
+  ret i32 -396142473
+}
+
+
+define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: svecc_call_realign:
+; CHECK0:       // %bb.0: // %entry
+; CHECK0-NEXT:    stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK0-NEXT:    .cfi_def_cfa_offset 64
+; CHECK0-NEXT:    cntd x9
+; CHECK0-NEXT:    stp x28, x27, [sp, #32] // 16-byte Folded Spill
+; CHECK0-NEXT:    str x9, [sp, #16] // 8-byte Folded Spill
+; CHECK0-NEXT:    stp x26, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK0-NEXT:    mov x29, sp
+; CHECK0-NEXT:    .cfi_def_cfa w29, 64
+; CHECK0-NEXT:    .cfi_offset w19, -8
+; CHECK0-NEXT:    .cfi_offset w26, -16
+; CHECK0-NEXT:    .cfi_offset w27, -24
+; CHECK0-NEXT:    .cfi_offset w28, -32
+; CHECK0-NEXT:    .cfi_offset w30, -56
+; CHECK0-NEXT:    .cfi_offset w29, -64
+; CHECK0-NEXT:    addvl sp, sp, #-18
+; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
+; CHECK0-NEXT:    sub x9, sp, #1024
+; CHECK0-NEXT:    and sp, x9, #0xffffffffffffffe0
+; CHECK0-NEXT:    mov w2, w1
+; CHECK0-NEXT:    //APP
+; CHECK0-NEXT:    //NO_APP
+; CHECK0-NEXT:    bl __arm_sme_state
+; CHECK0-NEXT:    and x19, x0, #0x1
+; CHECK0-NEXT:    .cfi_offset vg, -48
+; CHECK0-NEXT:    tbz w19, #0, .LBB36_2
+; CHECK0-NEXT:  // %bb.1: // %entry
+; CHECK0-NEXT:    smstop sm
+; CHECK0-NEXT:  .LBB36_2: // %entry
+; CHECK0-NEXT:    mov x0, sp
+; CHECK0-NEXT:    mov w1, #45 // =0x2d
+; CHECK0-NEXT:    bl memset
+; CHECK0-NEXT:    tbz w19, #0, .LBB36_4
+; CHECK0-NEXT:  // %bb.3: // %entry
+; CHECK0-NEXT:    smstart sm
+; CHECK0-NEXT:  .LBB36_4: // %entry
+; CHECK0-NEXT:    mov w0, #22647 // =0x5877
+; CHECK0-NEXT:    movk w0, #59491, lsl #16
+; CHECK0-NEXT:    .cfi_restore vg
+; CHECK0-NEXT:    addvl sp, x29, #-18
+; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    .cfi_restore z8
+; CHECK0-NEXT:    .cfi_restore z9
+; CHECK0-NEXT:    .cfi_restore z10
+; CHECK0-NEXT:    .cfi_restore z11
+; CHECK0-NEXT:    .cfi_restore z12
+; CHECK0-NEXT:    .cfi_restore z13
+; CHECK0-NEXT:    .cfi_restore z14
+; CHECK0-NEXT:    .cfi_restore z15
+; CHECK0-NEXT:    mov sp, x29
+; CHECK0-NEXT:    .cfi_def_cfa wsp, 64
+; CHECK0-NEXT:    ldp x26, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK0-NEXT:    .cfi_def_cfa_offset 0
+; CHECK0-NEXT:    .cfi_restore w19
+; CHECK0-NEXT:    .cfi_restore w26
+; CHECK0-NEXT:    .cfi_restore w27
+; CHECK0-NEXT:    .cfi_restore w28
+; CHECK0-NEXT:    .cfi_restore w30
+; CHECK0-NEXT:    .cfi_restore w29
+; CHECK0-NEXT:    ret
+;
+; CHECK64-LABEL: svecc_call_realign:
+; CHECK64:       // %bb.0: // %entry
+; CHECK64-NEXT:    sub sp, sp, #128
+; CHECK64-NEXT:    .cfi_def_cfa_offset 128
+; CHECK64-NEXT:    cntd x9
+; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT:    stp x9, x28, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT:    stp x27, x26, [sp, #96] // 16-byte Folded Spill
+; CHECK64-NEXT:    str x19, [sp, #112] // 8-byte Folded Spill
+; CHECK64-NEXT:    add x29, sp, #64
+; CHECK64-NEXT:    .cfi_def_cfa w29, 64
+; CHECK64-NEXT:    .cfi_offset w19, -16
+; CHECK64-NEXT:    .cfi_offset w26, -24
+; CHECK64-NEXT:    .cfi_offset w27, -32
+; CHECK64-NEXT:    .cfi_offset w28, -40
+; CHECK64-NEXT:    .cfi_offset w30, -56
+; CHECK64-NEXT:    .cfi_offset w29, -64
+; CHECK64-NEXT:    addvl sp, sp, #-18
+; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
+; CHECK64-NEXT:    sub x9, sp, #1088
+; CHECK64-NEXT:    and sp, x9, #0xffffffffffffffe0
+; CHECK64-NEXT:    mov w2, w1
+; CHECK64-NEXT:    //APP
+; CHECK64-NEXT:    //NO_APP
+; CHECK64-NEXT:    bl __arm_sme_state
+; CHECK64-NEXT:    and x19, x0, #0x1
+; CHECK64-NEXT:    .cfi_offset vg, -48
+; CHECK64-NEXT:    tbz w19, #0, .LBB36_2
+; CHECK64-NEXT:  // %bb.1: // %entry
+; CHECK64-NEXT:    smstop sm
+; CHECK64-NEXT:  .LBB36_2: // %entry
+; CHECK64-NEXT:    mov x0, sp
+; CHECK64-NEXT:    mov w1, #45 // =0x2d
+; CHECK64-NEXT:    bl memset
+; CHECK64-NEXT:    tbz w19, #0, .LBB36_4
+; CHECK64-NEXT:  // %bb.3: // %entry
+; CHECK64-NEXT:    smstart sm
+; CHECK64-NEXT:  .LBB36_4: // %entry
+; CHECK64-NEXT:    mov w0, #22647 // =0x5877
+; CHECK64-NEXT:    movk w0, #59491, lsl #16
+; CHECK64-NEXT:    .cfi_restore vg
+; CHECK64-NEXT:    sub x8, x29, #64
+; CHECK64-NEXT:    addvl sp, x8, #-18
+; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    .cfi_restore z8
+; CHECK64-NEXT:    .cfi_restore z9
+; CHECK64-NEXT:    .cfi_restore z10
+; CHECK64-NEXT:    .cfi_restore z11
+; CHECK64-NEXT:    .cfi_restore z12
+; CHECK64-NEXT:    .cfi_restore z13
+; CHECK64-NEXT:    .cfi_restore z14
+; CHECK64-NEXT:    .cfi_restore z15
+; CHECK64-NEXT:    sub sp, x29, #64
+; CHECK64-NEXT:    .cfi_def_cfa wsp, 128
+; CHECK64-NEXT:    ldp x26, x19, [sp, #104] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldp x28, x27, [sp, #88] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK64-NEXT:    add sp, sp, #128
+; CHECK64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK64-NEXT:    .cfi_restore w19
+; CHECK64-NEXT:    .cfi_restore w26
+; CHECK64-NEXT:    .cfi_restore w27
+; CHECK64-NEXT:    .cfi_restore w28
+; CHECK64-NEXT:    .cfi_restore w30
+; CHECK64-NEXT:    .cfi_restore w29
+; CHECK64-NEXT:    ret
+;
+; CHECK1024-LABEL: svecc_call_realign:
+; CHECK1024:       // %bb.0: // %entry
+; CHECK1024-NEXT:    sub sp, sp, #1088
+; CHECK1024-NEXT:    .cfi_def_cfa_offset 1088
+; CHECK1024-NEXT:    cntd x9
+; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x9, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x28, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x27, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x26, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x19, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NEXT:    add x29, sp, #1024
+; CHECK1024-NEXT:    .cfi_def_cfa w29, 64
+; CHECK1024-NEXT:    .cfi_offset w19, -16
+; CHECK1024-NEXT:    .cfi_offset w26, -24
+; CHECK1024-NEXT:    .cfi_offset w27, -32
+; CHECK1024-NEXT:    .cfi_offset w28, -40
+; CHECK1024-NEXT:    .cfi_offset w30, -56
+; CHECK1024-NEXT:    .cfi_offset w29, -64
+; CHECK1024-NEXT:    addvl sp, sp, #-18
+; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
+; CHECK1024-NEXT:    sub x9, sp, #2048
+; CHECK1024-NEXT:    and sp, x9, #0xffffffffffffffe0
+; CHECK1024-NEXT:    mov w2, w1
+; CHECK1024-NEXT:    //APP
+; CHECK1024-NEXT:    //NO_APP
+; CHECK1024-NEXT:    bl __arm_sme_state
+; CHECK1024-NEXT:    and x19, x0, #0x1
+; CHECK1024-NEXT:    .cfi_offset vg, -48
+; CHECK1024-NEXT:    tbz w19, #0, .LBB36_2
+; CHECK1024-NEXT:  // %bb.1: // %entry
+; CHECK1024-NEXT:    smstop sm
+; CHECK1024-NEXT:  .LBB36_2: // %entry
+; CHECK1024-NEXT:    mov x0, sp
+; CHECK1024-NEXT:    mov w1, #45 // =0x2d
+; CHECK1024-NEXT:    bl memset
+; CHECK1024-NEXT:    tbz w19, #0, .LBB36_4
+; CHECK1024-NEXT:  // %bb.3: // %entry
+; CHECK1024-NEXT:    smstart sm
+; CHECK1024-NEXT:  .LBB36_4: // %entry
+; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
+; CHECK1024-NEXT:    movk w0, #59491, lsl #16
+; CHECK1024-NEXT:    .cfi_restore vg
+; CHECK1024-NEXT:    sub x8, x29, #1024
+; CHECK1024-NEXT:    addvl sp, x8, #-18
+; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    .cfi_restore z8
+; CHECK1024-NEXT:    .cfi_restore z9
+; CHECK1024-NEXT:    .cfi_restore z10
+; CHECK1024-NEXT:    .cfi_restore z11
+; CHECK1024-NEXT:    .cfi_restore z12
+; CHECK1024-NEXT:    .cfi_restore z13
+; CHECK1024-NEXT:    .cfi_restore z14
+; CHECK1024-NEXT:    .cfi_restore z15
+; CHECK1024-NEXT:    sub sp, x29, #1024
+; CHECK1024-NEXT:    .cfi_def_cfa wsp, 1088
+; CHECK1024-NEXT:    ldr x19, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x26, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x27, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x28, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT:    add sp, sp, #1088
+; CHECK1024-NEXT:    .cfi_def_cfa_offset 0
+; CHECK1024-NEXT:    .cfi_restore w19
+; CHECK1024-NEXT:    .cfi_restore w26
+; CHECK1024-NEXT:    .cfi_restore w27
+; CHECK1024-NEXT:    .cfi_restore w28
+; CHECK1024-NEXT:    .cfi_restore w30
+; CHECK1024-NEXT:    .cfi_restore w29
+; CHECK1024-NEXT:    ret
+entry:
+  %ptr = alloca i8, i32 1000, align 32
+  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+  %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2)
+  ret i32 -396142473
+}
+
+
+define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK0-LABEL: svecc_call_dynamic_and_scalable_alloca:
+; CHECK0:       // %bb.0: // %entry
+; CHECK0-NEXT:    stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK0-NEXT:    str x28, [sp, #16] // 8-byte Folded Spill
+; CHECK0-NEXT:    mov x29, sp
+; CHECK0-NEXT:    stp x27, x26, [sp, #32] // 16-byte Folded Spill
+; CHECK0-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK0-NEXT:    addvl sp, sp, #-18
+; CHECK0-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK0-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK0-NEXT:    sub sp, sp, #48
+; CHECK0-NEXT:    addvl sp, sp, #-1
+; CHECK0-NEXT:    mov x19, sp
+; CHECK0-NEXT:    .cfi_def_cfa w29, 64
+; CHECK0-NEXT:    .cfi_offset w19, -8
+; CHECK0-NEXT:    .cfi_offset w20, -16
+; CHECK0-NEXT:    .cfi_offset w26, -24
+; CHECK0-NEXT:    .cfi_offset w27, -32
+; CHECK0-NEXT:    .cfi_offset w28, -48
+; CHECK0-NEXT:    .cfi_offset w30, -56
+; CHECK0-NEXT:    .cfi_offset w29, -64
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG
+; CHECK0-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG
+; CHECK0-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK0-NEXT:    ubfiz x8, x0, #2, #32
+; CHECK0-NEXT:    mov x9, sp
+; CHECK0-NEXT:    add x8, x8, #15
+; CHECK0-NEXT:    and x8, x8, #0x7fffffff0
+; CHECK0-NEXT:    sub x20, x9, x8
+; CHECK0-NEXT:    mov sp, x20
+; CHECK0-NEXT:    //APP
+; CHECK0-NEXT:    //NO_APP
+; CHECK0-NEXT:    add x0, x19, #8
+; CHECK0-NEXT:    bl bar
+; CHECK0-NEXT:    addvl x0, x29, #-19
+; CHECK0-NEXT:    bl bar
+; CHECK0-NEXT:    mov x0, x20
+; CHECK0-NEXT:    bl bar
+; CHECK0-NEXT:    mov w0, #22647 // =0x5877
+; CHECK0-NEXT:    movk w0, #59491, lsl #16
+; CHECK0-NEXT:    addvl sp, x29, #-18
+; CHECK0-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK0-NEXT:    mov sp, x29
+; CHECK0-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldr x28, [sp, #16] // 8-byte Folded Reload
+; CHECK0-NEXT:    ldp x27, x26, [sp, #32] // 16-byte Folded Reload
+; CHECK0-NEXT:    ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK0-NEXT:    ret
+;
+; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca:
+; CHECK64:       // %bb.0: // %entry
+; CHECK64-NEXT:    sub sp, sp, #128
+; CHECK64-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT:    add x29, sp, #64
+; CHECK64-NEXT:    stp x28, x27, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT:    stp x26, x20, [sp, #96] // 16-byte Folded Spill
+; CHECK64-NEXT:    str x19, [sp, #112] // 8-byte Folded Spill
+; CHECK64-NEXT:    addvl sp, sp, #-18
+; CHECK64-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK64-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK64-NEXT:    sub sp, sp, #112
+; CHECK64-NEXT:    addvl sp, sp, #-1
+; CHECK64-NEXT:    mov x19, sp
+; CHECK64-NEXT:    .cfi_def_cfa w29, 64
+; CHECK64-NEXT:    .cfi_offset w19, -16
+; CHECK64-NEXT:    .cfi_offset w20, -24
+; CHECK64-NEXT:    .cfi_offset w26, -32
+; CHECK64-NEXT:    .cfi_offset w27, -40
+; CHECK64-NEXT:    .cfi_offset w28, -48
+; CHECK64-NEXT:    .cfi_offset w30, -56
+; CHECK64-NEXT:    .cfi_offset w29, -64
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG
+; CHECK64-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
+; CHECK64-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK64-NEXT:    ubfiz x8, x0, #2, #32
+; CHECK64-NEXT:    mov x9, sp
+; CHECK64-NEXT:    add x8, x8, #15
+; CHECK64-NEXT:    and x8, x8, #0x7fffffff0
+; CHECK64-NEXT:    sub x20, x9, x8
+; CHECK64-NEXT:    mov sp, x20
+; CHECK64-NEXT:    //APP
+; CHECK64-NEXT:    //NO_APP
+; CHECK64-NEXT:    add x0, x19, #8
+; CHECK64-NEXT:    bl bar
+; CHECK64-NEXT:    sub x0, x29, #64
+; CHECK64-NEXT:    addvl x0, x0, #-19
+; CHECK64-NEXT:    bl bar
+; CHECK64-NEXT:    mov x0, x20
+; CHECK64-NEXT:    bl bar
+; CHECK64-NEXT:    mov w0, #22647 // =0x5877
+; CHECK64-NEXT:    sub x8, x29, #64
+; CHECK64-NEXT:    movk w0, #59491, lsl #16
+; CHECK64-NEXT:    addvl sp, x8, #-18
+; CHECK64-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK64-NEXT:    sub sp, x29, #64
+; CHECK64-NEXT:    ldp x20, x19, [sp, #104] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK64-NEXT:    ldp x27, x26, [sp, #88] // 16-byte Folded Reload
+; CHECK64-NEXT:    ldp x30, x28, [sp, #72] // 16-byte Folded Reload
+; CHECK64-NEXT:    add sp, sp, #128
+; CHECK64-NEXT:    ret
+;
+; CHECK1024-LABEL: svecc_call_dynamic_and_scalable_alloca:
+; CHECK1024:       // %bb.0: // %entry
+; CHECK1024-NEXT:    sub sp, sp, #1088
+; CHECK1024-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT:    add x29, sp, #1024
+; CHECK1024-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x28, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x27, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x26, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x20, [sp, #1064] // 8-byte Folded Spill
+; CHECK1024-NEXT:    str x19, [sp, #1072] // 8-byte Folded Spill
+; CHECK1024-NEXT:    addvl sp, sp, #-18
+; CHECK1024-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK1024-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK1024-NEXT:    sub sp, sp, #1072
+; CHECK1024-NEXT:    addvl sp, sp, #-1
+; CHECK1024-NEXT:    mov x19, sp
+; CHECK1024-NEXT:    .cfi_def_cfa w29, 64
+; CHECK1024-NEXT:    .cfi_offset w19, -16
+; CHECK1024-NEXT:    .cfi_offset w20, -24
+; CHECK1024-NEXT:    .cfi_offset w26, -32
+; CHECK1024-NEXT:    .cfi_offset w27, -40
+; CHECK1024-NEXT:    .cfi_offset w28, -48
+; CHECK1024-NEXT:    .cfi_offset w30, -56
+; CHECK1024-NEXT:    .cfi_offset w29, -64
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG
+; CHECK1024-NEXT:    .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
+; CHECK1024-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK1024-NEXT:    ubfiz x8, x0, #2, #32
+; CHECK1024-NEXT:    mov x9, sp
+; CHECK1024-NEXT:    add x8, x8, #15
+; CHECK1024-NEXT:    and x8, x8, #0x7fffffff0
+; CHECK1024-NEXT:    sub x20, x9, x8
+; CHECK1024-NEXT:    mov sp, x20
+; CHECK1024-NEXT:    //APP
+; CHECK1024-NEXT:    //NO_APP
+; CHECK1024-NEXT:    add x0, x19, #8
+; CHECK1024-NEXT:    bl bar
+; CHECK1024-NEXT:    sub x0, x29, #1024
+; CHECK1024-NEXT:    addvl x0, x0, #-19
+; CHECK1024-NEXT:    bl bar
+; CHECK1024-NEXT:    mov x0, x20
+; CHECK1024-NEXT:    bl bar
+; CHECK1024-NEXT:    mov w0, #22647 // =0x5877
+; CHECK1024-NEXT:    sub x8, x29, #1024
+; CHECK1024-NEXT:    movk w0, #59491, lsl #16
+; CHECK1024-NEXT:    addvl sp, x8, #-18
+; CHECK1024-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK1024-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK1024-NEXT:    sub sp, x29, #1024
+; CHECK1024-NEXT:    ldr x19, [sp, #1072] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x20, [sp, #1064] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x26, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x27, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x28, [sp, #1040] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT:    add sp, sp, #1088
+; CHECK1024-NEXT:    ret
+entry:
+  %a = alloca i32, i32 10
+  %b = alloca <vscale x 4 x i32>
+  %c = alloca i32, i32 %P1, align 4
+  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+  call void @bar(ptr noundef nonnull %a)
+  call void @bar(ptr noundef nonnull %b)
+  call void @bar(ptr noundef nonnull %c)
+  ret i32 -396142473
+}

From 6fb913d3e2ec82ac3b351f9652c311cd12088a8f Mon Sep 17 00:00:00 2001
From: dianqk <dianqk@dianqk.net>
Date: Sat, 28 Jun 2025 06:42:42 +0800
Subject: [PATCH 80/83] [RelLookupTableConverter] Drop unnamed_addr for GVs in
 entries to avoid generating GOTPCREL relocations (#146068)

The entry in a relative lookup table is a global variable with a
constant offset, such as `@gv`, `GEP @gv, 1`, and so on.

We cannot only consider the case of a trivial global variable. This PR
handles all cases using the existing `IsConstantOffsetFromGlobal`
function.

(cherry picked from commit c43282ab69d7ff1b64f8ef5c84eab46e57553075)
---
 .../Utils/RelLookupTableConverter.cpp         | 37 ++++++++-------
 .../RelLookupTableConverter/unnamed_addr.ll   | 45 +++++++++++++++++++
 2 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index 4486cba2bf6c0..fe1fe391f5982 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -67,6 +67,20 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
   if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64)
     return false;
 
+  SmallVector<GlobalVariable *, 4> GVOps;
+  Triple TT(M.getTargetTriple());
+  // FIXME: This should be removed in the future.
+  bool ShouldDropUnnamedAddr =
+      // Drop unnamed_addr to avoid matching pattern in
+      // `handleIndirectSymViaGOTPCRel`, which generates GOTPCREL relocations
+      // not supported by the GNU linker and LLD versions below 18 on aarch64.
+      TT.isAArch64()
+      // Apple's ld64 (and ld-prime on Xcode 15.2) miscompile something on
+      // x86_64-apple-darwin. See
+      // https://github.com/rust-lang/rust/issues/140686 and
+      // https://github.com/rust-lang/rust/issues/141306.
+      || (TT.isX86() && TT.isOSDarwin());
+
   for (const Use &Op : Array->operands()) {
     Constant *ConstOp = cast<Constant>(&Op);
     GlobalValue *GVOp;
@@ -86,8 +100,15 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
         !GlovalVarOp->isDSOLocal() ||
         !GlovalVarOp->isImplicitDSOLocal())
       return false;
+
+    if (ShouldDropUnnamedAddr)
+      GVOps.push_back(GlovalVarOp);
   }
 
+  if (ShouldDropUnnamedAddr)
+    for (auto *GVOp : GVOps)
+      GVOp->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
   return true;
 }
 
@@ -109,24 +130,8 @@ static GlobalVariable *createRelLookupTable(Function &Func,
   uint64_t Idx = 0;
   SmallVector<Constant *, 64> RelLookupTableContents(NumElts);
 
-  Triple TT(M.getTargetTriple());
-  // FIXME: This should be removed in the future.
-  bool ShouldDropUnnamedAddr =
-      // Drop unnamed_addr to avoid matching pattern in
-      // `handleIndirectSymViaGOTPCRel`, which generates GOTPCREL relocations
-      // not supported by the GNU linker and LLD versions below 18 on aarch64.
-      TT.isAArch64()
-      // Apple's ld64 (and ld-prime on Xcode 15.2) miscompile something on
-      // x86_64-apple-darwin. See
-      // https://github.com/rust-lang/rust/issues/140686 and
-      // https://github.com/rust-lang/rust/issues/141306.
-      || (TT.isX86() && TT.isOSDarwin());
-
   for (Use &Operand : LookupTableArr->operands()) {
     Constant *Element = cast<Constant>(Operand);
-    if (ShouldDropUnnamedAddr)
-      if (auto *GlobalElement = dyn_cast<GlobalValue>(Element))
-        GlobalElement->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
     Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
     Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy);
     Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy);
diff --git a/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll
index 78b8a4aa126c9..322c38d090fe1 100644
--- a/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll
+++ b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll
@@ -20,6 +20,14 @@
 @y3 = internal unnamed_addr constant ptr @x0
 @load_relative_2.table = private unnamed_addr constant [4 x ptr] [ptr @y3, ptr @y2, ptr @y1, ptr @y0]
 
+@b0 = private unnamed_addr constant [8 x i8] c"00000000"
+@b1 = private unnamed_addr constant [8 x i8] c"11111111"
+@b2 = private unnamed_addr constant [8 x i8] c"22222222"
+@load_relative_3.table = private unnamed_addr constant [3 x ptr] [
+  ptr getelementptr inbounds (i8, ptr @b0, i64 8),
+  ptr getelementptr inbounds (i8, ptr @b1, i64 8),
+  ptr getelementptr inbounds (i8, ptr @b2, i64 8)]
+
 ;.
 ; x86_64-apple-darwin: @a0 = private constant i32 0
 ; x86_64-apple-darwin: @a1 = private constant i32 1
@@ -34,6 +42,10 @@
 ; x86_64-apple-darwin: @y2 = internal constant ptr @x1
 ; x86_64-apple-darwin: @y3 = internal constant ptr @x0
 ; x86_64-apple-darwin: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4
+; x86_64-apple-darwin: @b0 = private constant [8 x i8] c"00000000"
+; x86_64-apple-darwin: @b1 = private constant [8 x i8] c"11111111"
+; x86_64-apple-darwin: @b2 = private constant [8 x i8] c"22222222"
+; x86_64-apple-darwin: @load_relative_3.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b0, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b1, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b2, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32)], align 4
 ;.
 ; aarch64: @a0 = private constant i32 0
 ; aarch64: @a1 = private constant i32 1
@@ -48,6 +60,10 @@
 ; aarch64: @y2 = internal constant ptr @x1
 ; aarch64: @y3 = internal constant ptr @x0
 ; aarch64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4
+; aarch64: @b0 = private constant [8 x i8] c"00000000"
+; aarch64: @b1 = private constant [8 x i8] c"11111111"
+; aarch64: @b2 = private constant [8 x i8] c"22222222"
+; aarch64: @load_relative_3.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b0, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b1, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b2, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32)], align 4
 ;.
 ; x86_64: @a0 = private unnamed_addr constant i32 0
 ; x86_64: @a1 = private unnamed_addr constant i32 1
@@ -62,6 +78,10 @@
 ; x86_64: @y2 = internal unnamed_addr constant ptr @x1
 ; x86_64: @y3 = internal unnamed_addr constant ptr @x0
 ; x86_64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4
+; x86_64: @b0 = private unnamed_addr constant [8 x i8] c"00000000"
+; x86_64: @b1 = private unnamed_addr constant [8 x i8] c"11111111"
+; x86_64: @b2 = private unnamed_addr constant [8 x i8] c"22222222"
+; x86_64: @load_relative_3.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b0, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b1, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b2, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32)], align 4
 ;.
 define ptr @load_relative_1(i64 %offset) {
 ; x86_64-apple-darwin-LABEL: define ptr @load_relative_1(
@@ -110,6 +130,31 @@ define ptr @load_relative_2(i64 %offset) {
   %load = load ptr, ptr %gep
   ret ptr %load
 }
+
+define ptr @load_relative_3(i64 %offset) {
+; x86_64-apple-darwin-LABEL: define ptr @load_relative_3(
+; x86_64-apple-darwin-SAME: i64 [[OFFSET:%.*]]) {
+; x86_64-apple-darwin-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; x86_64-apple-darwin-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_3.table.rel, i64 [[RELTABLE_SHIFT]])
+; x86_64-apple-darwin-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+; aarch64-LABEL: define ptr @load_relative_3(
+; aarch64-SAME: i64 [[OFFSET:%.*]]) {
+; aarch64-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; aarch64-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_3.table.rel, i64 [[RELTABLE_SHIFT]])
+; aarch64-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+; x86_64-LABEL: define ptr @load_relative_3(
+; x86_64-SAME: i64 [[OFFSET:%.*]]) {
+; x86_64-NEXT:    [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2
+; x86_64-NEXT:    [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_3.table.rel, i64 [[RELTABLE_SHIFT]])
+; x86_64-NEXT:    ret ptr [[RELTABLE_INTRINSIC]]
+;
+  %gep = getelementptr inbounds [3 x ptr], ptr @load_relative_3.table, i64 0, i64 %offset
+  %load = load ptr, ptr %gep
+  ret ptr %load
+}
+
 ;.
 ; x86_64-apple-darwin: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
 ;.

From 25bcf1145fd78e945eda6d9c72bde9b8e294b6b3 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth@google.com>
Date: Fri, 18 Apr 2025 09:12:52 -0700
Subject: [PATCH 81/83] [RISCV] Fix assertion failure when using
 -fstack-clash-protection (#135248)

We can't assume MBBI is still pointing at MBB if we've already expanded
a probe. We need to re-query the MBB from MBBI. Fixes #135206

Co-authored-by: Craig Topper <craig.topper@sifive.com>
(cherry picked from commit b3d2dc321c5c78b7204696afe07fe6ef3375acfd)
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp |  8 +-
 llvm/test/CodeGen/RISCV/pr135206.ll          | 84 ++++++++++++++++++++
 2 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/pr135206.ll

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index bb2e5781c34db..6f4c1e16190f4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -2135,11 +2135,13 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
 }
 
 // Synthesize the probe loop.
-static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI, DebugLoc DL,
+static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
                                  Register TargetReg, bool IsRVV) {
   assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
 
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
   auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
   bool IsRV64 = Subtarget.is64Bit();
@@ -2228,7 +2230,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
       MachineBasicBlock::iterator MBBI = MI->getIterator();
       DebugLoc DL = MBB.findDebugLoc(MBBI);
       Register TargetReg = MI->getOperand(1).getReg();
-      emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
+      emitStackProbeInline(MBBI, DL, TargetReg,
                            (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
       MBBI->eraseFromParent();
     }
diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll
new file mode 100644
index 0000000000000..859179f62d704
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr135206.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple riscv64 < %s -o - | FileCheck %s
+
+%"buff" = type { [4096 x i64] }
+
+declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
+declare void @bar()
+
+define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -2032
+; CHECK-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 2008(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 2000(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 1992(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    lui a0, 7
+; CHECK-NEXT:    sub t1, sp, a0
+; CHECK-NEXT:    lui t2, 1
+; CHECK-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    sub sp, sp, t2
+; CHECK-NEXT:    sd zero, 0(sp)
+; CHECK-NEXT:    bne sp, t1, .LBB0_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    addi sp, sp, -2048
+; CHECK-NEXT:    addi sp, sp, -96
+; CHECK-NEXT:    csrr t1, vlenb
+; CHECK-NEXT:    lui t2, 1
+; CHECK-NEXT:  .LBB0_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    sub sp, sp, t2
+; CHECK-NEXT:    sd zero, 0(sp)
+; CHECK-NEXT:    sub t1, t1, t2
+; CHECK-NEXT:    bge t1, t2, .LBB0_3
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    sub sp, sp, t1
+; CHECK-NEXT:    li a0, 86
+; CHECK-NEXT:    addi s0, sp, 48
+; CHECK-NEXT:    addi s1, sp, 32
+; CHECK-NEXT:    addi s2, sp, 16
+; CHECK-NEXT:    lui a1, 353637
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addiw a0, a0, 32
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    addiw a0, a1, 1622
+; CHECK-NEXT:    vse8.v v8, (s0)
+; CHECK-NEXT:    vse8.v v8, (s1)
+; CHECK-NEXT:    vse8.v v8, (s2)
+; CHECK-NEXT:    slli a1, a0, 32
+; CHECK-NEXT:    add s3, a0, a1
+; CHECK-NEXT:    sd s3, 64(sp)
+; CHECK-NEXT:    call bar
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addiw a0, a0, 32
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vse8.v v8, (s0)
+; CHECK-NEXT:    vse8.v v8, (s1)
+; CHECK-NEXT:    vse8.v v8, (s2)
+; CHECK-NEXT:    sd s3, 64(sp)
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    add sp, sp, a1
+; CHECK-NEXT:    lui a1, 8
+; CHECK-NEXT:    addiw a1, a1, -1952
+; CHECK-NEXT:    add sp, sp, a1
+; CHECK-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 2000(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 1992(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 2032
+; CHECK-NEXT:    ret
+  %1 = alloca %"buff", align 8
+  call void @llvm.memset.p0.i64(ptr %1, i8 86, i64 56, i1 false)
+  call void @bar()
+  call void @llvm.memset.p0.i64(ptr %1, i8 86, i64 56, i1 false)
+  ret i1 false
+}
+

From df43f93388b7587c9843838a237dd57a9bd19b52 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 6 Jun 2025 17:50:16 +0200
Subject: [PATCH 82/83] [PhaseOrdering] Add test for #139050 (NFC)

(cherry picked from commit cef5a3155bab9a2db5389f782471d56f1dd15b61)
---
 .../PhaseOrdering/X86/vector-reductions.ll    | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index 254136b0b841a..f8450766037b2 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -325,3 +325,53 @@ cleanup:
   %retval.0 = phi i1 [ false, %if.then ], [ true, %if.end ]
   ret i1 %retval.0
 }
+
+; From https://github.com/llvm/llvm-project/issues/139050.
+; FIXME: This should be vectorized.
+define i8 @masked_min_reduction(ptr %data, ptr %mask) {
+; CHECK-LABEL: @masked_min_reduction(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[DATA]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[M:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[M]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]])
+; CHECK-NEXT:    [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 [[TMP21]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %next, %loop ]
+  %acc = phi i8 [ 255, %entry ], [ %acc_next, %loop ]
+
+  %ptr_i = getelementptr i8, ptr %data, i64 %i
+  %val = load i8, ptr %ptr_i, align 1
+
+  %mask_ptr = getelementptr i8, ptr %mask, i64 %i
+  %m = load i8, ptr %mask_ptr, align 1
+  %cond = icmp eq i8 %m, 0
+
+  ; Use select to implement masking
+  %masked_val = select i1 %cond, i8 %val, i8 255
+
+  ; min reduction
+  %acc_next = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+
+  %next = add i64 %i, 1
+  %cmp = icmp ult i64 %next, 1024
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %acc_next
+}

From 87f0227cb60147a26a1eeb4fb06e3b505e9c7261 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <thevar1able@users.noreply.github.com>
Date: Sat, 14 Jun 2025 09:32:54 +0300
Subject: [PATCH 83/83] [InstCombine] Avoid folding `select(umin(X, Y), X)`
 with min/max values in false arm (#143020)

Fixes https://github.com/llvm/llvm-project/issues/139050.

This patch adds a check to avoid folding min/max reduction into select, which may block loop vectorization.

The issue is that the following snippet:
```
declare i8 @llvm.umin.i8(i8, i8)

define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
; CHECK-LABEL: @masked_min_fold_bug(
; CHECK:       %cond = icmp eq i8 %mask, 0
; CHECK:       %masked_val = select i1 %cond, i8 %val, i8 255
; CHECK:       call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
;
  %cond = icmp eq i8 %mask, 0
  %masked_val = select i1 %cond, i8 %val, i8 255
  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
  ret i8 %res
}
```

is being optimized to the following code, which can not be vectorized
later.
```
declare i8 @llvm.umin.i8(i8, i8) #0

define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
  %cond = icmp eq i8 %mask, 0
  %1 = call i8 @llvm.umin.i8(i8 %acc, i8 %val)
  %res = select i1 %cond, i8 %1, i8 %acc
  ret i8 %res
}

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```

Expected:
```
declare i8 @llvm.umin.i8(i8, i8) #0

define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
  %cond = icmp eq i8 %mask, 0
  %masked_val = select i1 %cond, i8 %val, i8 -1
  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
  ret i8 %res
}

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```

https://godbolt.org/z/cYMheKE5r
(cherry picked from commit 07fa6d1d90c714fa269529c3e5004a063d814c4a)
---
 .../InstCombine/InstructionCombining.cpp      |  9 ++++
 llvm/test/Transforms/InstCombine/select.ll    | 47 +++++++++++++++++
 .../PhaseOrdering/X86/vector-reductions.ll    | 50 ++++++++++++++-----
 3 files changed, 94 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a64c188575e6c..0f5e867877da2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1697,6 +1697,15 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
   if (SI->getType()->isIntOrIntVectorTy(1))
     return nullptr;
 
+  // Avoid breaking min/max reduction pattern,
+  // which is necessary for vectorization later.
+  if (isa<MinMaxIntrinsic>(&Op))
+    for (Value *IntrinOp : Op.operands())
+      if (auto *PN = dyn_cast<PHINode>(IntrinOp))
+        for (Value *PhiOp : PN->operands())
+          if (PhiOp == &Op)
+            return nullptr;
+
   // Test if a FCmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
   // any other folding. This helps out other analyses which understand
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 3c3111492fc68..e8a32cb1697a5 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -4901,3 +4901,50 @@ define i32 @src_simplify_2x_at_once_and(i32 %x, i32 %y) {
   %cond = select i1 %and0, i32 %sub, i32 %xor
   ret i32 %cond
 }
+
+define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) {
+; CHECK-LABEL: @no_fold_masked_min_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[RES:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[VALS:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[MASK_PTR:%.*]] = getelementptr inbounds i8, ptr [[MASKS:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[VAL_PTR]], align 1
+; CHECK-NEXT:    [[MASK:%.*]] = load i8, ptr [[MASK_PTR]], align 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[MASK]], 0
+; CHECK-NEXT:    [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL]], i8 -1
+; CHECK-NEXT:    [[RES]] = call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[MASKED_VAL]])
+; CHECK-NEXT:    [[NEXT_INDEX]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXT_INDEX]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i8 [[RES]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [0, %entry], [%next_index, %loop]
+  %acc = phi i8 [255, %entry], [%res, %loop]
+
+  %val_ptr = getelementptr inbounds i8, ptr %vals, i64 %index
+  %mask_ptr = getelementptr inbounds i8, ptr %masks, i64 %index
+
+  %val = load i8, ptr %val_ptr, align 1
+  %mask = load i8, ptr %mask_ptr, align 1
+
+  %cond = icmp eq i8 %mask, 0
+  %masked_val = select i1 %cond, i8 %val, i8 -1
+  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+
+  %next_index = add i64 %index, 1
+  %done = icmp eq i64 %next_index, %n
+  br i1 %done, label %exit, label %loop
+
+exit:
+  store i8 %res, ptr %out, align 1
+  ret void
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index f8450766037b2..2ec48a8637dae 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -326,26 +326,52 @@ cleanup:
   ret i1 %retval.0
 }
 
-; From https://github.com/llvm/llvm-project/issues/139050.
-; FIXME: This should be vectorized.
 define i8 @masked_min_reduction(ptr %data, ptr %mask) {
 ; CHECK-LABEL: @masked_min_reduction(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       loop:
+; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[DATA]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DATA]], i64 96
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[DATA]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[M:%.*]] = load i8, ptr [[TMP7]], align 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[M]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]])
-; CHECK-NEXT:    [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP22]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD7]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1)
+; CHECK-NEXT:    [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]])
+; CHECK-NEXT:    [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]])
+; CHECK-NEXT:    [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]])
+; CHECK-NEXT:    [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK:       exit:
+; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP16]], <32 x i8> [[TMP17]])
+; CHECK-NEXT:    [[RDX_MINMAX11:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX]], <32 x i8> [[TMP18]])
+; CHECK-NEXT:    [[RDX_MINMAX12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX11]], <32 x i8> [[TMP19]])
+; CHECK-NEXT:    [[TMP21:%.*]] = tail call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> [[RDX_MINMAX12]])
 ; CHECK-NEXT:    ret i8 [[TMP21]]
 ;
 entry: