From ebfae55af454e5b81ca4f532714fd2e589297b8d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 29 Apr 2025 17:24:51 -0700 Subject: [PATCH 01/83] Bump version to 20.1.5 --- cmake/Modules/LLVMVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 7bb6c66a92e12..c8cc0b8968b05 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 4) + set(LLVM_VERSION_PATCH 5) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) From aecbb2364a7ccee1048447127fdb5865f70b3c6b Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Fri, 18 Apr 2025 16:27:27 +0800 Subject: [PATCH 02/83] [Clang] Fix the trailing comma regression (#136273) 925e195 introduced a regression since which we started to accept invalid trailing commas in many expression lists where they're not allowed by the grammar. The issue came from the fact that an additional invalid state - previously handled by ParseExpressionList - was overlooked in that patch. Fixes https://github.com/llvm/llvm-project/issues/136254 No release entry because I want to backport it. (cherry picked from commit c7daab259c3281cf8f649583993bad2536febc02) --- clang/lib/Parse/ParseExpr.cpp | 3 --- clang/test/Parser/recovery.cpp | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 0cadede51a9b3..2fab1dfed4a00 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -2237,8 +2237,6 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { if (PP.isCodeCompletionReached() && !CalledSignatureHelp) RunSignatureHelp(); LHS = ExprError(); - } else if (!HasError && HasTrailingComma) { - Diag(Tok, diag::err_expected_expression); } else if (LHS.isInvalid()) { for (auto &E : ArgExprs) Actions.CorrectDelayedTyposInExpr(E); @@ -3738,7 +3736,6 @@ bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, if (Tok.is(tok::r_paren)) { if (HasTrailingComma) *HasTrailingComma = true; - break; } } if (SawError) { diff --git a/clang/test/Parser/recovery.cpp b/clang/test/Parser/recovery.cpp index 2fce67a52c6b6..261f5dc99bad4 100644 --- a/clang/test/Parser/recovery.cpp +++ b/clang/test/Parser/recovery.cpp @@ -222,3 +222,21 @@ void k() { func(1, ); // expected-error {{expected expression}} } } + +namespace GH136254 { + +void call() { + [a(42, )]() {} (); // expected-error {{expected expression}} + + int *b = new int(42, ); // expected-error {{expected expression}} + + struct S { + int c; + + S() : c(42, ) {} // expected-error {{expected expression}} + }; + + int d(42, ); // expected-error {{expected expression}} +} + +} From be4097b6ee5793c33f6731e9cf908e67d627fded Mon Sep 17 00:00:00 2001 From: 3405691582 Date: Mon, 31 Mar 2025 12:17:55 -0400 Subject: [PATCH 03/83] Fix crash lowering stack guard on OpenBSD/aarch64. (#125416) TargetLoweringBase::getIRStackGuard refers to a platform-specific guard variable. Before this change, TargetLoweringBase::getSDagStackGuard only referred to a different variable. This means that SelectionDAGBuilder's getLoadStackGuard does not get memory operands. However, AArch64InstrInfo::expandPostRAPseudo assumes that the passed MachineInstr has nonzero memoperands, causing a segfault. We have two possible options here: either disabling the LOAD_STACK_GUARD node entirely in AArch64TargetLowering::useLoadStackGuardNode or just making the platform-specific values match across TargetLoweringBase. Here, we try the latter. (cherry picked from commit c180e249d0013474d502cd779ec65b33cf7e9468) --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 9c56912aa6ba0..411f59e714b0e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1987,6 +1987,9 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { // Currently only support "standard" __stack_chk_guard. // TODO: add LOAD_STACK_GUARD support. Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { + if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { + return M.getNamedValue("__guard_local"); + } return M.getNamedValue("__stack_chk_guard"); } From a38e1ae2041db6815c482b5194718409ff2e742c Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 25 Mar 2025 10:09:25 +0000 Subject: [PATCH 04/83] [AArch64][SME2] Don't preserve ZT0 around SME ABI routines (#132722) This caused ZT0 to be preserved around `__arm_tpidr2_save` in functions with "aarch64_new_zt0". The block in which `__arm_tpidr2_save` is called is added by the SMEABIPass and may be reachable in cases where ZA has not been enabled* (so using `str zt0` is invalid). * (when za_save_buffer is null and num_za_save_slices is zero) --- .../AArch64/Utils/AArch64SMEAttributes.h | 3 +- .../AArch64/sme-disable-gisel-fisel.ll | 9 +-- llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 61 +++++++++++++------ 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index fb093da70c46b..a3ebf764a6e0c 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -133,7 +133,8 @@ class SMEAttrs { bool hasZT0State() const { return isNewZT0() || sharesZT0(); } bool requiresPreservingZT0(const SMEAttrs &Callee) const { return hasZT0State() && !Callee.sharesZT0() && - !Callee.hasAgnosticZAInterface(); + !Callee.hasAgnosticZAInterface() && + !(Callee.Bitmask & SME_ABI_Routine); } bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const { return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() && diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 33d08beae2ca7..4a52bf27a7591 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -475,16 +475,12 @@ declare double @zt0_shared_callee(double) "aarch64_inout_zt0" define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" { ; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee: ; CHECK-COMMON: // %bb.0: // %prelude -; CHECK-COMMON-NEXT: sub sp, sp, #80 -; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: cbz x8, .LBB13_2 ; CHECK-COMMON-NEXT: b .LBB13_1 ; CHECK-COMMON-NEXT: .LBB13_1: // %save.za -; CHECK-COMMON-NEXT: mov x8, sp -; CHECK-COMMON-NEXT: str zt0, [x8] ; CHECK-COMMON-NEXT: bl __arm_tpidr2_save -; CHECK-COMMON-NEXT: ldr zt0, [x8] ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: b .LBB13_2 ; CHECK-COMMON-NEXT: .LBB13_2: // %entry @@ -495,8 +491,7 @@ define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline ; CHECK-COMMON-NEXT: fmov d1, x8 ; CHECK-COMMON-NEXT: fadd d0, d0, d1 ; CHECK-COMMON-NEXT: smstop za -; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: add sp, sp, #80 +; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ret entry: %call = call double @zt0_shared_callee(double %x) diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index 312537630e77a..500fff4eb20db 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -112,7 +112,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar ret void; } -; New-ZA Callee +; New-ZT0 Callee ; Expect spill & fill of ZT0 around call ; Expect smstop/smstart za around call @@ -134,6 +134,39 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind { ret void; } +; New-ZT0 Callee + +; Expect commit of lazy-save if ZA is dormant +; Expect smstart ZA & clear ZT0 +; Expect spill & fill of ZT0 around call +; Before return, expect smstop ZA +define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind { +; CHECK-LABEL: zt0_new_caller_zt0_new_callee: +; CHECK: // %bb.0: // %prelude +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbz x8, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %save.za +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: smstart za +; CHECK-NEXT: zero { zt0 } +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: str zt0, [x19] +; CHECK-NEXT: smstop za +; CHECK-NEXT: bl callee +; CHECK-NEXT: smstart za +; CHECK-NEXT: ldr zt0, [x19] +; CHECK-NEXT: smstop za +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + call void @callee() "aarch64_new_zt0"; + ret void; +} + ; ; New-ZA Caller ; @@ -144,23 +177,18 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind { define void @zt0_new_caller() "aarch64_new_zt0" nounwind { ; CHECK-LABEL: zt0_new_caller: ; CHECK: // %bb.0: // %prelude -; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: cbz x8, .LBB6_2 +; CHECK-NEXT: cbz x8, .LBB7_2 ; CHECK-NEXT: // %bb.1: // %save.za -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str zt0, [x8] ; CHECK-NEXT: bl __arm_tpidr2_save -; CHECK-NEXT: ldr zt0, [x8] ; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstop za -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void @callee() "aarch64_in_zt0"; ret void; @@ -172,24 +200,19 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind { define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind { ; CHECK-LABEL: new_za_zt0_caller: ; CHECK: // %bb.0: // %prelude -; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: cbz x8, .LBB7_2 +; CHECK-NEXT: cbz x8, .LBB8_2 ; CHECK-NEXT: // %bb.1: // %save.za -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str zt0, [x8] ; CHECK-NEXT: bl __arm_tpidr2_save -; CHECK-NEXT: ldr zt0, [x8] ; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero {za} ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstop za -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; From 069ef671e0aba3874e690750ad95c59a437b0c34 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 25 Apr 2025 13:33:09 +0100 Subject: [PATCH 05/83] [AArch64][SME] Allow spills of ZT0 around SME ABI routines again (#136726) In #132722 spills of ZT0 were disabled around all SME ABI routines to avoid a case where ZT0 is spilled before ZA is enabled (resulting in a crash). It turns out that the ABI does not promise that routines will preserve ZT0 (however in practice they do), so generally disabling ZT0 spills for ABI routines is not correct. The case where a crash was possible was "aarch64_new_zt0" functions with ZA disabled on entry and a ZT0 spill around __arm_tpidr2_save. In this case, ZT0 will be undefined at the call to __arm_tpidr2_save, so this patch avoids the ZT0 spill by marking the callsite with "aarch64_zt0_undef". This attribute only applies to callsites and marks that at the point the call is made ZT0 is not defined, so does not need preserving. --- llvm/lib/IR/Verifier.cpp | 3 ++ llvm/lib/Target/AArch64/SMEABIPass.cpp | 16 ++++++-- .../AArch64/Utils/AArch64SMEAttributes.cpp | 2 + .../AArch64/Utils/AArch64SMEAttributes.h | 11 ++--- .../CodeGen/AArch64/sme-new-zt0-function.ll | 14 +++++++ llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 41 +++++++++++++++++-- llvm/test/Verifier/sme-attributes.ll | 3 ++ .../Target/AArch64/SMEAttributesTest.cpp | 30 ++++++++++++++ 8 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8432779c107de..551c00a518b8f 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2818,6 +2818,9 @@ void Verifier::visitFunction(const Function &F) { Check(!Attrs.hasAttrSomewhere(Attribute::ElementType), "Attribute 'elementtype' can only be applied to a callsite.", &F); + Check(!Attrs.hasFnAttr("aarch64_zt0_undef"), + "Attribute 'aarch64_zt0_undef' can only be applied to a callsite."); + if (Attrs.hasFnAttr(Attribute::Naked)) for (const Argument &Arg : F.args()) Check(Arg.use_empty(), "cannot use argument of naked function", &Arg); diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp index bb885d86392fe..b6685497e1fd1 100644 --- a/llvm/lib/Target/AArch64/SMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp @@ -54,14 +54,22 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); } //===----------------------------------------------------------------------===// // Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0. -void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) { +void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, bool ZT0IsUndef = false) { + auto &Ctx = M->getContext(); auto *TPIDR2SaveTy = FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false); - auto Attrs = AttributeList().addFnAttribute(M->getContext(), - "aarch64_pstate_sm_compatible"); + auto Attrs = + AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible"); FunctionCallee Callee = M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs); CallInst *Call = Builder.CreateCall(Callee); + + // If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark + // that on the __arm_tpidr2_save call. This prevents an unnecessary spill of + // ZT0 that can occur before ZA is enabled. + if (ZT0IsUndef) + Call->addFnAttr(Attribute::get(Ctx, "aarch64_zt0_undef")); + Call->setCallingConv( CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0); @@ -119,7 +127,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F, // Create a call __arm_tpidr2_save, which commits the lazy save. Builder.SetInsertPoint(&SaveBB->back()); - emitTPIDR2Save(M, Builder); + emitTPIDR2Save(M, Builder, /*ZT0IsUndef=*/FnAttrs.isNewZT0()); // Enable pstate.za at the start of the function. Builder.SetInsertPoint(&OrigBB->front()); diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index bf16acd7f8f7e..76d2ac6a601e5 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -75,6 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) { Bitmask |= SM_Body; if (Attrs.hasFnAttr("aarch64_za_state_agnostic")) Bitmask |= ZA_State_Agnostic; + if (Attrs.hasFnAttr("aarch64_zt0_undef")) + Bitmask |= ZT0_Undef; if (Attrs.hasFnAttr("aarch64_in_za")) Bitmask |= encodeZAState(StateValue::In); if (Attrs.hasFnAttr("aarch64_out_za")) diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index a3ebf764a6e0c..1691d4fec8b68 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -43,9 +43,10 @@ class SMEAttrs { SM_Body = 1 << 2, // aarch64_pstate_sm_body SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves ZA_State_Agnostic = 1 << 4, - ZA_Shift = 5, + ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills + ZA_Shift = 6, ZA_Mask = 0b111 << ZA_Shift, - ZT0_Shift = 8, + ZT0_Shift = 9, ZT0_Mask = 0b111 << ZT0_Shift }; @@ -125,6 +126,7 @@ class SMEAttrs { bool isPreservesZT0() const { return decodeZT0State(Bitmask) == StateValue::Preserved; } + bool isUndefZT0() const { return Bitmask & ZT0_Undef; } bool sharesZT0() const { StateValue State = decodeZT0State(Bitmask); return State == StateValue::In || State == StateValue::Out || @@ -132,9 +134,8 @@ class SMEAttrs { } bool hasZT0State() const { return isNewZT0() || sharesZT0(); } bool requiresPreservingZT0(const SMEAttrs &Callee) const { - return hasZT0State() && !Callee.sharesZT0() && - !Callee.hasAgnosticZAInterface() && - !(Callee.Bitmask & SME_ABI_Routine); + return hasZT0State() && !Callee.isUndefZT0() && !Callee.sharesZT0() && + !Callee.hasAgnosticZAInterface(); } bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const { return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() && diff --git a/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll b/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll new file mode 100644 index 0000000000000..94968ab4fd9ac --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll @@ -0,0 +1,14 @@ +; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s + +declare void @callee(); + +define void @private_za() "aarch64_new_zt0" { + call void @callee() + ret void +} + +; CHECK: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() #[[TPIDR2_SAVE_CALL_ATTR:[0-9]+]] +; CHECK: declare void @__arm_tpidr2_save() #[[TPIDR2_SAVE_DECL_ATTR:[0-9]+]] + +; CHECK: attributes #[[TPIDR2_SAVE_DECL_ATTR]] = { "aarch64_pstate_sm_compatible" } +; CHECK: attributes #[[TPIDR2_SAVE_CALL_ATTR]] = { "aarch64_zt0_undef" } diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index 500fff4eb20db..7361e850d713e 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -167,6 +167,39 @@ define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind { ret void; } +; Expect commit of lazy-save if ZA is dormant +; Expect smstart ZA & clear ZT0 +; No spill & fill of ZT0 around __arm_tpidr2_save +; Expect spill & fill of ZT0 around __arm_sme_state call +; Before return, expect smstop ZA +define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind { +; CHECK-LABEL: zt0_new_caller_abi_routine_callee: +; CHECK: // %bb.0: // %prelude +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbz x8, .LBB7_2 +; CHECK-NEXT: // %bb.1: // %save.za +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: smstart za +; CHECK-NEXT: zero { zt0 } +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: str zt0, [x19] +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: ldr zt0, [x19] +; CHECK-NEXT: smstop za +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + %res = call {i64, i64} @__arm_sme_state() + %res.0 = extractvalue {i64, i64} %res, 0 + ret i64 %res.0 +} + +declare {i64, i64} @__arm_sme_state() + ; ; New-ZA Caller ; @@ -179,11 +212,11 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind { ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: cbz x8, .LBB7_2 +; CHECK-NEXT: cbz x8, .LBB8_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: bl callee @@ -202,11 +235,11 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind { ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: cbz x8, .LBB8_2 +; CHECK-NEXT: cbz x8, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero {za} ; CHECK-NEXT: zero { zt0 } diff --git a/llvm/test/Verifier/sme-attributes.ll b/llvm/test/Verifier/sme-attributes.ll index 4bf5e813daf2f..0ae2b9fd91f52 100644 --- a/llvm/test/Verifier/sme-attributes.ll +++ b/llvm/test/Verifier/sme-attributes.ll @@ -68,3 +68,6 @@ declare void @zt0_inout_out() "aarch64_inout_zt0" "aarch64_out_zt0"; declare void @zt0_inout_agnostic() "aarch64_inout_zt0" "aarch64_za_state_agnostic"; ; CHECK: Attributes 'aarch64_new_zt0', 'aarch64_in_zt0', 'aarch64_out_zt0', 'aarch64_inout_zt0', 'aarch64_preserves_zt0' and 'aarch64_za_state_agnostic' are mutually exclusive + +declare void @zt0_undef_function() "aarch64_zt0_undef"; +; CHECK: Attribute 'aarch64_zt0_undef' can only be applied to a callsite. diff --git a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp index 3af5e24168c8c..f8c77fcba19cf 100644 --- a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp +++ b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp @@ -1,6 +1,7 @@ #include "Utils/AArch64SMEAttributes.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" @@ -69,6 +70,15 @@ TEST(SMEAttributes, Constructors) { ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_new_zt0\"") ->getFunction("foo")) .isNewZT0()); + ASSERT_TRUE( + SA(cast((parseIR("declare void @callee()\n" + "define void @foo() {" + "call void @callee() \"aarch64_zt0_undef\"\n" + "ret void\n}") + ->getFunction("foo") + ->begin() + ->front()))) + .isUndefZT0()); // Invalid combinations. EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible), @@ -215,6 +225,18 @@ TEST(SMEAttributes, Basics) { ASSERT_FALSE(ZT0_New.hasSharedZAInterface()); ASSERT_TRUE(ZT0_New.hasPrivateZAInterface()); + SA ZT0_Undef = SA(SA::ZT0_Undef | SA::encodeZT0State(SA::StateValue::New)); + ASSERT_TRUE(ZT0_Undef.isNewZT0()); + ASSERT_FALSE(ZT0_Undef.isInZT0()); + ASSERT_FALSE(ZT0_Undef.isOutZT0()); + ASSERT_FALSE(ZT0_Undef.isInOutZT0()); + ASSERT_FALSE(ZT0_Undef.isPreservesZT0()); + ASSERT_FALSE(ZT0_Undef.sharesZT0()); + ASSERT_TRUE(ZT0_Undef.hasZT0State()); + ASSERT_FALSE(ZT0_Undef.hasSharedZAInterface()); + ASSERT_TRUE(ZT0_Undef.hasPrivateZAInterface()); + ASSERT_TRUE(ZT0_Undef.isUndefZT0()); + ASSERT_FALSE(SA(SA::Normal).isInZT0()); ASSERT_FALSE(SA(SA::Normal).isOutZT0()); ASSERT_FALSE(SA(SA::Normal).isInOutZT0()); @@ -285,6 +307,7 @@ TEST(SMEAttributes, Transitions) { SA ZT0_Shared = SA(SA::encodeZT0State(SA::StateValue::In)); SA ZA_ZT0_Shared = SA(SA::encodeZAState(SA::StateValue::In) | SA::encodeZT0State(SA::StateValue::In)); + SA Undef_ZT0 = SA(SA::ZT0_Undef); // Shared ZA -> Private ZA Interface ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(Private_ZA)); @@ -295,6 +318,13 @@ TEST(SMEAttributes, Transitions) { ASSERT_TRUE(ZT0_Shared.requiresPreservingZT0(Private_ZA)); ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA)); + // Shared Undef ZT0 -> Private ZA Interface + // Note: "Undef ZT0" is a callsite attribute that means ZT0 is undefined at + // point the of the call. + ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Undef_ZT0)); + ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(Undef_ZT0)); + ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Undef_ZT0)); + // Shared ZA & ZT0 -> Private ZA Interface ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA)); ASSERT_TRUE(ZA_ZT0_Shared.requiresPreservingZT0(Private_ZA)); From 8272e451613d8f929e3d9d0d28c3ca1b225b0000 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 19 Mar 2025 12:01:18 -0700 Subject: [PATCH 06/83] [flang] Exempt construct entities from SAVE check for PURE (#131383) A PURE subprogram can't have a local variable with the SAVE attribute. An ASSOCIATE or SELECT TYPE construct entity whose selector is a variable will return true from IsSave(); exclude them from the local variable check. Fixes https://github.com/llvm/llvm-project/issues/131356. (cherry picked from commit b99dab25879449cb89c1ebd7b4088163543918e3) --- flang/lib/Semantics/check-declarations.cpp | 5 ++++- flang/test/Semantics/call10.f90 | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 5c26469b9fa24..8da9252133bdc 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -359,7 +359,10 @@ void CheckHelper::Check(const Symbol &symbol) { // are not pertinent to the characteristics of the procedure. // Restrictions on entities in pure procedure interfaces don't need // enforcement. - } else if (!FindCommonBlockContaining(symbol) && IsSaved(symbol)) { + } else if (symbol.has() || + FindCommonBlockContaining(symbol)) { + // can look like they have SAVE but are fine in PURE + } else if (IsSaved(symbol)) { if (IsInitialized(symbol)) { messages_.Say( "A pure subprogram may not initialize a variable"_err_en_US); diff --git a/flang/test/Semantics/call10.f90 b/flang/test/Semantics/call10.f90 index 2d2f57934cd8a..1e186f7b4048a 100644 --- a/flang/test/Semantics/call10.f90 +++ b/flang/test/Semantics/call10.f90 @@ -36,6 +36,8 @@ pure subroutine s05a end subroutine end interface + real :: moduleVar = 1. + contains subroutine impure(x) @@ -117,6 +119,8 @@ pure subroutine s05 ! C1589 !ERROR: A pure subprogram may not initialize a variable real :: v6 = 0. end block + associate (x => moduleVar) ! ok + end associate end subroutine pure subroutine s06 ! C1589 !ERROR: A pure subprogram may not have a variable with the VOLATILE attribute From 6ddf2e5d10f87b9f439316da01906027ddfa5c4a Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Tue, 29 Apr 2025 11:13:30 +0200 Subject: [PATCH 07/83] =?UTF-8?q?[clang-tidy]=20Do=20not=20pass=20any=20fi?= =?UTF-8?q?le=20when=20listing=20checks=20in=20run=5Fclang=5Fti=E2=80=A6?= =?UTF-8?q?=20(#137286)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …dy.py Currently, run_clang_tidy.py does not correctly display the list of checks picked up from the top-level .clang-tidy file. The reason for that is that we are passing an empty string as input file. However, that's not how we are supposed to use clang-tidy to list checks. Per https://github.com/llvm/llvm-project/commit/65eccb463df7fe511c813ee6a1794c80d7489ff2, we simply should not pass any file at all - the internal code of clang-tidy will pass a "dummy" file if that's the case and get the .clang-tidy file from the current working directory. Fixes #136659 Co-authored-by: Carlos Gálvez (cherry picked from commit 014ab736dc741f24c007f9861e24b31faba0e1e7) --- clang-tools-extra/clang-tidy/tool/run-clang-tidy.py | 7 ++++--- clang-tools-extra/docs/ReleaseNotes.rst | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index f1b934f7139e9..8741147a4f8a3 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -87,7 +87,7 @@ def find_compilation_database(path: str) -> str: def get_tidy_invocation( - f: str, + f: Optional[str], clang_tidy_binary: str, checks: str, tmpdir: Optional[str], @@ -147,7 +147,8 @@ def get_tidy_invocation( start.append(f"--warnings-as-errors={warnings_as_errors}") if allow_no_checks: start.append("--allow-no-checks") - start.append(f) + if f: + start.append(f) return start @@ -490,7 +491,7 @@ async def main() -> None: try: invocation = get_tidy_invocation( - "", + None, clang_tidy_binary, args.checks, None, diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 2ab597eb37048..0b2e9c5fabc36 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -190,6 +190,9 @@ Improvements to clang-tidy - Fixed bug in :program:`clang-tidy` by which `HeaderFilterRegex` did not take effect when passed via the `.clang-tidy` file. +- Fixed bug in :program:`run_clang_tidy.py` where the program would not + correctly display the checks enabled by the top-level `.clang-tidy` file. + New checks ^^^^^^^^^^ From 70eed33971d9b83fe81d837588dba64a6413b015 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Wed, 30 Apr 2025 08:22:38 +0100 Subject: [PATCH 08/83] [InstCombine] Do not combine shuffle+bitcast if the bitcast is eliminable. (#135769) If we are attempting to combine shuffle+bitcast but the bitcast is pairable with a subsequent bitcast, we should not fold the shuffle as doing so can block further simplifications. The motivation for this is a long-standing regression affecting SIMDe on AArch64, introduced indirectly by the AlwaysInliner (1a2e77cf). Some reproducers: * https://godbolt.org/z/53qx18s6M * https://godbolt.org/z/o5e43h5M7 (cherry picked from commit c91c3f930cfc75eb4e8b623ecd59c807863aa6c0) --- .../InstCombine/InstCombineVectorOps.cpp | 16 ++++++--- .../InstCombine/shufflevec-bitcast.ll | 35 +++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6860a7cd07b78..118d2d4be828f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -3029,10 +3029,18 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector BCs; DenseMap NewBCs; for (User *U : SVI.users()) - if (BitCastInst *BC = dyn_cast(U)) - if (!BC->use_empty()) - // Only visit bitcasts that weren't previously handled. - BCs.push_back(BC); + if (BitCastInst *BC = dyn_cast(U)) { + // Only visit bitcasts that weren't previously handled. + if (BC->use_empty()) + continue; + // Prefer to combine bitcasts of bitcasts before attempting this fold. + if (BC->hasOneUse()) { + auto *BC2 = dyn_cast(BC->user_back()); + if (BC2 && isEliminableCastPair(BC, BC2)) + continue; + } + BCs.push_back(BC); + } for (BitCastInst *BC : BCs) { unsigned BegIdx = Mask.front(); Type *TgtTy = BC->getDestTy(); diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll index f20077243273c..877dd1eefbae4 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -235,3 +235,38 @@ define <3 x i4> @shuf_bitcast_wrong_size(<2 x i8> %v, i8 %x) { %r = shufflevector <4 x i4> %b, <4 x i4> undef, <3 x i32> ret <3 x i4> %r } + +; Negative test - chain of bitcasts. + +define <16 x i8> @shuf_bitcast_chain(<8 x i32> %v) { +; CHECK-LABEL: @shuf_bitcast_chain( +; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[C:%.*]] = bitcast <4 x i32> [[S]] to <16 x i8> +; CHECK-NEXT: ret <16 x i8> [[C]] +; + %s = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> + %a = bitcast <4 x i32> %s to <2 x i64> + %b = bitcast <2 x i64> %a to i128 + %c = bitcast i128 %b to <16 x i8> + ret <16 x i8> %c +} + +; Same as above, but showing why it's not feasable to implement the reverse +; fold in VectorCombine (see #136998). + +define <4 x i32> @shuf_bitcast_chain_2(<8 x i32> %v) { +; CHECK-LABEL: @shuf_bitcast_chain_2( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[S0]], [[S1]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %s0 = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> + %s1 = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> + %b0 = bitcast <4 x i32> %s0 to i128 + %b1 = bitcast <4 x i32> %s1 to i128 + %c0 = bitcast i128 %b0 to <4 x i32> + %c1 = bitcast i128 %b1 to <4 x i32> + %r = or <4 x i32> %c0, %c1 + ret <4 x i32> %r +} From 009f3c10d1c1be330bf420b97bdd9c5236b93923 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Mon, 7 Apr 2025 11:34:24 -0400 Subject: [PATCH 09/83] [LLD][COFF] Don't dllimport from static libraries (#134443) This reverts commit 6a1bdd9 and re-instate behavior that matches what MSVC link.exe does, that is, error out when trying to dllimport a symbol from a static library. A hint is now displayed in stdout, mentioning that we should rather dllimport the symbol from a import library. Fixes https://github.com/llvm/llvm-project/issues/131807 --- lld/COFF/Driver.cpp | 6 ++-- lld/COFF/SymbolTable.cpp | 25 ++++++++------ lld/COFF/SymbolTable.h | 5 +-- .../COFF/imports-static-lib-indirect.test | 26 +++++++++++++++ lld/test/COFF/imports-static-lib.test | 33 +++++++++++++++++++ lld/test/COFF/undefined_lazy.test | 26 --------------- 6 files changed, 77 insertions(+), 44 deletions(-) create mode 100644 lld/test/COFF/imports-static-lib-indirect.test create mode 100644 lld/test/COFF/imports-static-lib.test delete mode 100644 lld/test/COFF/undefined_lazy.test diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index ac3ac57bd17f4..f50ca529df4d7 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2639,10 +2639,8 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { createECExportThunks(); // Resolve remaining undefined symbols and warn about imported locals. - ctx.forEachSymtab([&](SymbolTable &symtab) { - while (symtab.resolveRemainingUndefines()) - run(); - }); + ctx.forEachSymtab( + [&](SymbolTable &symtab) { symtab.resolveRemainingUndefines(); }); if (errorCount()) return; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 307bd4a0c9411..a146e5211736e 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -214,7 +214,8 @@ struct UndefinedDiag { std::vector files; }; -static void reportUndefinedSymbol(COFFLinkerContext &ctx, +static void reportUndefinedSymbol(SymbolTable *symTab, + COFFLinkerContext &ctx, const UndefinedDiag &undefDiag) { auto diag = errorOrWarn(ctx); diag << "undefined symbol: " << undefDiag.sym; @@ -232,6 +233,17 @@ static void reportUndefinedSymbol(COFFLinkerContext &ctx, } if (numDisplayedRefs < numRefs) diag << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times"; + + // Hints + StringRef name = undefDiag.sym->getName(); + if (name.consume_front("__imp_")) { + Symbol *imp = symTab->find(name); + if (imp && imp->isLazy()) { + diag << "\nNOTE: a relevant symbol '" << imp->getName() + << "' is available in " << toString(imp->getFile()) + << " but cannot be used because it is not an import library."; + } + } } void SymbolTable::loadMinGWSymbols() { @@ -402,7 +414,7 @@ void SymbolTable::reportProblemSymbols( processFile(file, file->getSymbols()); for (const UndefinedDiag &undefDiag : undefDiags) - reportUndefinedSymbol(ctx, undefDiag); + reportUndefinedSymbol(this, ctx, undefDiag); } void SymbolTable::reportUnresolvable() { @@ -432,11 +444,10 @@ void SymbolTable::reportUnresolvable() { reportProblemSymbols(undefs, /*localImports=*/nullptr, true); } -bool SymbolTable::resolveRemainingUndefines() { +void SymbolTable::resolveRemainingUndefines() { llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols"); SmallPtrSet undefs; DenseMap localImports; - bool foundLazy = false; for (auto &i : symMap) { Symbol *sym = i.second; @@ -481,11 +492,6 @@ bool SymbolTable::resolveRemainingUndefines() { imp = findLocalSym(*mangledName); } } - if (imp && imp->isLazy()) { - forceLazy(imp); - foundLazy = true; - continue; - } if (imp && isa(imp)) { auto *d = cast(imp); replaceSymbol(sym, ctx, name, d); @@ -513,7 +519,6 @@ bool SymbolTable::resolveRemainingUndefines() { reportProblemSymbols( undefs, ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false); - return foundLazy; } std::pair SymbolTable::insert(StringRef name) { diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index ff6e8487f0734..2916c23d95c87 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -58,10 +58,7 @@ class SymbolTable { // Try to resolve any undefined symbols and update the symbol table // accordingly, then print an error message for any remaining undefined // symbols and warn about imported local symbols. - // Returns whether more files might need to be linked in to resolve lazy - // symbols, in which case the caller is expected to call the function again - // after linking those files. - bool resolveRemainingUndefines(); + void resolveRemainingUndefines(); // Load lazy objects that are needed for MinGW automatic import and for // doing stdcall fixups. diff --git a/lld/test/COFF/imports-static-lib-indirect.test b/lld/test/COFF/imports-static-lib-indirect.test new file mode 100644 index 0000000000000..beda0d7a31afd --- /dev/null +++ b/lld/test/COFF/imports-static-lib-indirect.test @@ -0,0 +1,26 @@ +# REQUIRES: x86 + +# Pulling in on both a dllimport symbol and a static symbol should only warn. +# RUN: split-file %s %t.dir +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/other.s -o %t.other.obj +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/main.s -o %t.main.obj +# RUN: llvm-lib %t.other.obj -out:%t.other.lib +# RUN: lld-link %t.other.lib %t.main.obj -out:%t.dll -dll 2>&1 | FileCheck %s + +CHECK: warning: {{.*}} locally defined symbol imported: foo {{.*}} [LNK4217] + +#--- other.s +.text +.globl other +.globl foo +other: + ret +foo: + ret +#--- main.s +.text +.global _DllMainCRTStartup +_DllMainCRTStartup: + call *other(%rip) + call *__imp_foo(%rip) + ret diff --git a/lld/test/COFF/imports-static-lib.test b/lld/test/COFF/imports-static-lib.test new file mode 100644 index 0000000000000..8e9525dab5284 --- /dev/null +++ b/lld/test/COFF/imports-static-lib.test @@ -0,0 +1,33 @@ +# REQUIRES: x86 + +# Ensure that we don't import dllimport symbols from static (non-import) libraries +# RUN: split-file %s %t.dir +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo.s -o %t.foo.obj +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/main.s -o %t.main.obj +# RUN: llvm-lib %t.foo.obj -out:%t.foo.lib +# RUN: not lld-link %t.foo.lib %t.main.obj -out:%t.dll -dll 2>&1 | FileCheck %s + +CHECK: error: undefined symbol: __declspec(dllimport) foo +CHECK: NOTE: a relevant symbol 'foo' is available in {{.*}}.foo.lib but cannot be used because it is not an import library. + +# Now do the same thing, but import the symbol from a import library. +# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo_dll_main.s -o %t.foo_dll_main.obj +# RUN: lld-link /out:%t.dll /dll %t.foo.obj %t.foo_dll_main.obj /export:foo /implib:%t.foo.imp.lib +# RUN: lld-link %t.main.obj %t.foo.imp.lib -out:%t.exe -dll + +#--- foo.s +.text +.globl foo +foo: + ret +#--- foo_dll_main.s +.text +.global _DllMainCRTStartup +_DllMainCRTStartup: + ret +#--- main.s +.text +.global _DllMainCRTStartup +_DllMainCRTStartup: + call *__imp_foo(%rip) + ret diff --git a/lld/test/COFF/undefined_lazy.test b/lld/test/COFF/undefined_lazy.test deleted file mode 100644 index ed5cd358b5cd9..0000000000000 --- a/lld/test/COFF/undefined_lazy.test +++ /dev/null @@ -1,26 +0,0 @@ -# REQUIRES: x86 - -# RUN: split-file %s %t.dir -# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo.s -o %t.foo.obj -# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/bar.s -o %t.bar.obj -# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/qux.s -o %t.qux.obj -# RUN: llvm-lib %t.foo.obj -out:%t.foo.lib -# RUN: llvm-lib %t.bar.obj -out:%t.bar.lib -# RUN: lld-link %t.foo.lib %t.bar.lib %t.qux.obj -out:%t.dll -dll -# -#--- foo.s -.text -.globl foo -foo: - call bar -#--- bar.s -.text -.globl bar -bar: - ret -#--- qux.s -.text -.global _DllMainCRTStartup -_DllMainCRTStartup: - call *__imp_foo(%rip) - ret From 961ce35e29574eebfa82c447ac7fef8e499f53b9 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 5 May 2025 16:33:41 -0500 Subject: [PATCH 10/83] [OpenMP] Add pre sm_70 load hack back in (#138589) Summary: Different ordering modes aren't supported for an atomic load, so we just do an add of zero as the same thing. It's less efficient, but it works. Fixes https://github.com/llvm/llvm-project/issues/138560 (cherry picked from commit dfcb8cb2a92c9f72ddde5ea08dadf2f640197d32) --- offload/DeviceRTL/include/Synchronization.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h index 5a789441b9d35..c510fbf0774c2 100644 --- a/offload/DeviceRTL/include/Synchronization.h +++ b/offload/DeviceRTL/include/Synchronization.h @@ -61,7 +61,11 @@ V add(Ty *Address, V Val, atomic::OrderingTy Ordering, template > V load(Ty *Address, atomic::OrderingTy Ordering, MemScopeTy MemScope = MemScopeTy::device) { +#ifdef __NVPTX__ + return __scoped_atomic_fetch_add(Address, V(0), Ordering, MemScope); +#else return __scoped_atomic_load_n(Address, Ordering, MemScope); +#endif } template > From 2386c377db4ff35129d1dc6a618ea13252493ca4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 6 May 2025 14:19:47 +0200 Subject: [PATCH 11/83] [BasicAA] Gracefully handle large LocationSize (#138528) If the LocationSize is larger than the index space of the pointer type, bail out instead of triggering an APInt assertion. Fixes the issue reported at https://github.com/llvm/llvm-project/pull/119365#issuecomment-2849874894. (cherry picked from commit 027b2038140f309467585298f9cb10d6b37411e7) --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 8 +++++--- llvm/test/Analysis/BasicAA/size-overflow.ll | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/BasicAA/size-overflow.ll diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index b2a3f3390e000..06e8eb7072917 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1245,8 +1245,11 @@ AliasResult BasicAAResult::aliasGEP( if (V1Size.isScalable() || V2Size.isScalable()) return AliasResult::MayAlias; - // We need to know both acess sizes for all the following heuristics. - if (!V1Size.hasValue() || !V2Size.hasValue()) + // We need to know both access sizes for all the following heuristics. Don't + // try to reason about sizes larger than the index space. + unsigned BW = DecompGEP1.Offset.getBitWidth(); + if (!V1Size.hasValue() || !V2Size.hasValue() || + !isUIntN(BW, V1Size.getValue()) || !isUIntN(BW, V2Size.getValue())) return AliasResult::MayAlias; APInt GCD; @@ -1301,7 +1304,6 @@ AliasResult BasicAAResult::aliasGEP( // Compute ranges of potentially accessed bytes for both accesses. If the // interseciton is empty, there can be no overlap. - unsigned BW = OffsetRange.getBitWidth(); ConstantRange Range1 = OffsetRange.add( ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue()))); ConstantRange Range2 = diff --git a/llvm/test/Analysis/BasicAA/size-overflow.ll b/llvm/test/Analysis/BasicAA/size-overflow.ll new file mode 100644 index 0000000000000..2a390d29e472a --- /dev/null +++ b/llvm/test/Analysis/BasicAA/size-overflow.ll @@ -0,0 +1,14 @@ +; RUN: opt -passes=aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s + +target datalayout = "p:32:32" + +; Make sure that using a LocationSize larget than the index space does not +; assert. + +; CHECK: Just Mod: Ptr: i32* %gep <-> call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 4294967296, i1 false) +define void @test(ptr %p, i32 %idx) { + %gep = getelementptr i8, ptr %p, i32 %idx + load i32, ptr %gep + call void @llvm.memset.i64(ptr %p, i8 0, i64 u0x100000000, i1 false) + ret void +} From ae97a56d363f95d382bac5eca5f9b5775b1919c2 Mon Sep 17 00:00:00 2001 From: Ikhlas Ajbar Date: Tue, 6 May 2025 16:47:25 -0500 Subject: [PATCH 12/83] [Hexagon] Add missing patterns to select PFALSE and PTRUE (#138712) Fixes #134659 (cherry picked from commit 57e88993fee30f4441e87df4df061393600b2ada) --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 5 ++++ llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 244f204539c89..acf701b0f3e5d 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -109,7 +109,12 @@ def pfalse: PatFrag<(ops), (HexagonPFALSE)>; def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>; def: Pat<(v8i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>; +def: Pat<(v4i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>; +def: Pat<(v2i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>; + def: Pat<(v8i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>; +def: Pat<(v4i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>; +def: Pat<(v2i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>; def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; diff --git a/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll new file mode 100644 index 0000000000000..c0904b8b4fdd6 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s + +; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0 +; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs + +define fastcc i16 @test(ptr %0, { <4 x i32>, <4 x i1> } %1, <4 x i1> %2) { +Entry: + %3 = alloca [16 x i8], i32 0, align 16 + %4 = alloca [16 x i8], i32 0, align 16 + store <4 x i32> , ptr %4, align 16 + store <4 x i32> , ptr %3, align 16 + %5 = load <4 x i32>, ptr %4, align 16 + %6 = load <4 x i32>, ptr %3, align 16 + %7 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> %5, <4 x i32> %6) + %8 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %2) + br i1 %8, label %OverflowFail, label %OverflowOk + +OverflowFail: ; preds = %Entry + store volatile i32 0, ptr null, align 4 + unreachable + +OverflowOk: ; preds = %Entry + %9 = extractvalue { <4 x i32>, <4 x i1> } %7, 0 + store <4 x i32> %9, ptr %0, align 16 + ret i16 0 + } + +declare { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32>, <4 x i32>) #0 +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) #0 From 2b34040173f7d0e9b6da246aa6b923611b56fb87 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Mon, 5 May 2025 13:32:33 +0530 Subject: [PATCH 13/83] [clang-repl] Fix destructor for interpreter for the cuda negation case (#138091) Check this error for more context (https://github.com/compiler-research/CppInterOp/actions/runs/14749797085/job/41407625681?pr=491#step:10:531) This fails with ``` * thread #1, name = 'CppInterOpTests', stop reason = signal SIGSEGV: address not mapped to object (fault address: 0x55500356d6d3) * frame #0: 0x00007fffee41cfe3 libclangCppInterOp.so.21.0gitclang::PragmaNamespace::~PragmaNamespace() + 99 frame #1: 0x00007fffee435666 libclangCppInterOp.so.21.0gitclang::Preprocessor::~Preprocessor() + 3830 frame #2: 0x00007fffee20917a libclangCppInterOp.so.21.0gitstd::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() + 58 frame #3: 0x00007fffee224796 libclangCppInterOp.so.21.0gitclang::CompilerInstance::~CompilerInstance() + 838 frame #4: 0x00007fffee22494d libclangCppInterOp.so.21.0gitclang::CompilerInstance::~CompilerInstance() + 13 frame #5: 0x00007fffed95ec62 libclangCppInterOp.so.21.0gitclang::IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() + 98 frame #6: 0x00007fffed9551b6 libclangCppInterOp.so.21.0gitclang::Interpreter::~Interpreter() + 102 frame #7: 0x00007fffed95598d libclangCppInterOp.so.21.0gitclang::Interpreter::~Interpreter() + 13 frame #8: 0x00007fffed9181e7 libclangCppInterOp.so.21.0gitcompat::createClangInterpreter(std::vector>&) + 2919 ``` Problem : 1) The destructor currently handles no clearance for the DeviceParser and the DeviceAct. We currently only have this https://github.com/llvm/llvm-project/blob/976493822443c52a71ed3c67aaca9a555b20c55d/clang/lib/Interpreter/Interpreter.cpp#L416-L419 2) The ownership for DeviceCI currently is present in IncrementalCudaDeviceParser. But this should be similar to how the combination for hostCI, hostAction and hostParser are managed by the Interpreter. As on master the DeviceAct and DeviceParser are managed by the Interpreter but not DeviceCI. This is problematic because : IncrementalParser holds a Sema& which points into the DeviceCI. On master, DeviceCI is destroyed before the base class ~IncrementalParser() runs, causing Parser::reset() to access a dangling Sema (and as Sema holds a reference to Preprocessor which owns PragmaNamespace) we see this ``` * frame #0: 0x00007fffee41cfe3 libclangCppInterOp.so.21.0gitclang::PragmaNamespace::~PragmaNamespace() + 99 frame #1: 0x00007fffee435666 libclangCppInterOp.so.21.0gitclang::Preprocessor::~Preprocessor() + 3830 ``` (cherry picked from commit 529b6fcb00aabbed17365e5fb3abbc2ae127c967) --- clang/include/clang/Interpreter/Interpreter.h | 3 +++ clang/lib/Interpreter/DeviceOffload.cpp | 8 +++----- clang/lib/Interpreter/DeviceOffload.h | 4 +--- clang/lib/Interpreter/Interpreter.cpp | 9 ++++++++- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 56213f88b9e30..f8663e3193a18 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -116,6 +116,9 @@ class Interpreter { /// Compiler instance performing the incremental compilation. std::unique_ptr CI; + /// An optional compiler instance for CUDA offloading + std::unique_ptr DeviceCI; + protected: // Derived classes can use an extended interface of the Interpreter. Interpreter(std::unique_ptr Instance, llvm::Error &Err, diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 7d0125403ea52..05625ddedb72f 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -25,13 +25,12 @@ namespace clang { IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( - std::unique_ptr DeviceInstance, - CompilerInstance &HostInstance, + CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr FS, llvm::Error &Err, const std::list &PTUs) - : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), + : IncrementalParser(DeviceInstance, Err), PTUs(PTUs), VFS(FS), CodeGenOpts(HostInstance.getCodeGenOpts()), - TargetOpts(DeviceInstance->getTargetOpts()) { + TargetOpts(DeviceInstance.getTargetOpts()) { if (Err) return; StringRef Arch = TargetOpts.CPU; @@ -41,7 +40,6 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( llvm::inconvertibleErrorCode())); return; } - DeviceCI = std::move(DeviceInstance); } llvm::Expected IncrementalCUDADeviceParser::GeneratePTX() { diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h index 43645033c4840..0b903e31c6799 100644 --- a/clang/lib/Interpreter/DeviceOffload.h +++ b/clang/lib/Interpreter/DeviceOffload.h @@ -28,8 +28,7 @@ class IncrementalCUDADeviceParser : public IncrementalParser { public: IncrementalCUDADeviceParser( - std::unique_ptr DeviceInstance, - CompilerInstance &HostInstance, + CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr VFS, llvm::Error &Err, const std::list &PTUs); @@ -42,7 +41,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser { ~IncrementalCUDADeviceParser(); protected: - std::unique_ptr DeviceCI; int SMVersion; llvm::SmallString<1024> PTXCode; llvm::SmallVector FatbinContent; diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index f91563dd0378c..3b81f9d701b42 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -416,6 +416,10 @@ Interpreter::Interpreter(std::unique_ptr Instance, Interpreter::~Interpreter() { IncrParser.reset(); Act->FinalizeAction(); + if (DeviceParser) + DeviceParser.reset(); + if (DeviceAct) + DeviceAct->FinalizeAction(); if (IncrExecutor) { if (llvm::Error Err = IncrExecutor->cleanUp()) llvm::report_fatal_error( @@ -501,8 +505,11 @@ Interpreter::createWithCUDA(std::unique_ptr CI, DCI->ExecuteAction(*Interp->DeviceAct); + Interp->DeviceCI = std::move(DCI); + auto DeviceParser = std::make_unique( - std::move(DCI), *Interp->getCompilerInstance(), IMVFS, Err, Interp->PTUs); + *Interp->DeviceCI, *Interp->getCompilerInstance(), IMVFS, Err, + Interp->PTUs); if (Err) return std::move(Err); From be087ab35970dffe3e473b5a10266ed356261746 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 5 May 2025 17:19:13 -0400 Subject: [PATCH 14/83] [libc++] Re-introduce _LIBCPP_DISABLE_AVAILABILITY (#134158) The `_LIBCPP_DISABLE_AVAILABILITY` macro was removed in afae1a5f32bb as an intended no-op. It turns out that some projects are making use of that macro to work around a Clang bug with availability annotations that still exists: https://github.com/llvm/llvm-project/issues/134151. Since that Clang bug still hasn't been fixed, I feel that we must sill honor that unfortunate macro until we've figured out how to get rid of it without breaking code. (cherry picked from commit 25fc52e655fb4bfd3bb89948d5cbfe011e1b8984) --- libcxx/docs/ReleaseNotes/20.rst | 3 -- libcxx/include/__configuration/availability.h | 8 ++- .../vendor/apple/disable-availability.sh.cpp | 49 +++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 06e6e673b5508..f81a573845e6f 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -153,9 +153,6 @@ Deprecations and Removals headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to suppress deprecation for these headers. -- The ``_LIBCPP_DISABLE_AVAILABILITY`` macro that was used to force-disable availability markup has now been removed. - Whether availability markup is used by the library is now solely controlled at configuration-time. - - The pointer safety functions ``declare_reachable``, ``declare_no_pointers``, ``undeclare_no_pointers`` and ``__undeclare_reachable`` have been removed from the library. These functions were never implemented in a non-trivial way, making it very unlikely that any binary depends on them. diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h index f9e52a690c05c..aa2e75b6f6fe8 100644 --- a/libcxx/include/__configuration/availability.h +++ b/libcxx/include/__configuration/availability.h @@ -69,7 +69,13 @@ // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. -#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) +// +// We also allow users to force-disable availability markup via the `_LIBCPP_DISABLE_AVAILABILITY` +// macro because that is the only way to work around a Clang bug related to availability +// attributes: https://github.com/llvm/llvm-project/issues/134151. +// Once that bug has been fixed, we should remove the macro. +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ + !defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_DISABLE_AVAILABILITY) # undef _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS # define _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS 0 #endif diff --git a/libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp b/libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp new file mode 100644 index 0000000000000..474b3f83c6044 --- /dev/null +++ b/libcxx/test/libcxx/vendor/apple/disable-availability.sh.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: stdlib=apple-libc++ + +// This test is dependent on the code generated by the compiler, and it doesn't +// work properly with older AppleClangs. +// UNSUPPORTED: apple-clang-15 + +// This test ensures that we retain a way to disable availability markup on Apple platforms +// in order to work around Clang bug https://github.com/llvm/llvm-project/issues/134151. +// +// Once that bug has been fixed or once we've made changes to libc++'s use of availability +// that render that workaround unnecessary, the macro and this test can be removed. +// +// The test works by creating a final linked image that refers to a function marked with +// both an availability attribute and with _LIBCPP_HIDE_FROM_ABI. We then check that this +// generates a weak reference to the function -- without the bug, we'd expect a strong +// reference or no reference at all instead. + +// First, test the test. Make sure that we do (incorrectly) produce a weak definition when we +// don't define _LIBCPP_DISABLE_AVAILABILITY. Otherwise, something may have changed in libc++ +// and this test might not work anymore. +// RUN: %{cxx} %s %{flags} %{compile_flags} %{link_flags} -fvisibility=hidden -fvisibility-inlines-hidden -shared -o %t.1.dylib +// RUN: nm -m %t.1.dylib | c++filt | grep value > %t.1.symbols +// RUN: grep weak %t.1.symbols + +// Now, make sure that 'weak' goes away when we define _LIBCPP_DISABLE_AVAILABILITY. +// In fact, all references to the function might go away, so we just check that we don't emit +// any weak reference. +// RUN: %{cxx} %s %{flags} %{compile_flags} %{link_flags} -fvisibility=hidden -fvisibility-inlines-hidden -D_LIBCPP_DISABLE_AVAILABILITY -shared -o %t.2.dylib +// RUN: nm -m %t.2.dylib | c++filt | grep value > %t.2.symbols +// RUN: not grep weak %t.2.symbols + +#include + +template +struct optional { + T val_; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE T value() const { return val_; } +}; + +using PMF = int (optional::*)() const; +PMF f() { return &optional::value; } From 5429418cb06455d867f24b2c7cf2477357a4418c Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sun, 4 May 2025 20:55:49 +0300 Subject: [PATCH 15/83] [clang] Add support for Debian 14 Forky and Debian 15 Duke (#138460) Futureproofs our single Debian-specific special case for roughly the next 6 years. See: https://lists.debian.org/debian-devel-announce/2025/01/msg00004.html (cherry picked from commit 58e6883c8b6e571d6bd774645ee2b6348cfed6ba) --- clang/include/clang/Driver/Distro.h | 4 +++- clang/lib/Driver/Distro.cpp | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index b4d485dac8a26..c544a8c002191 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -39,6 +39,8 @@ class Distro { DebianBullseye, DebianBookworm, DebianTrixie, + DebianForky, + DebianDuke, Exherbo, RHEL5, RHEL6, @@ -128,7 +130,7 @@ class Distro { bool IsOpenSUSE() const { return DistroVal == OpenSUSE; } bool IsDebian() const { - return DistroVal >= DebianLenny && DistroVal <= DebianTrixie; + return DistroVal >= DebianLenny && DistroVal <= DebianDuke; } bool IsUbuntu() const { diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 3cc79535de8da..71ba71fa18379 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -160,6 +160,10 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { return Distro::DebianBookworm; case 13: return Distro::DebianTrixie; + case 14: + return Distro::DebianForky; + case 15: + return Distro::DebianDuke; default: return Distro::UnknownDistro; } @@ -173,6 +177,8 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { .Case("bullseye/sid", Distro::DebianBullseye) .Case("bookworm/sid", Distro::DebianBookworm) .Case("trixie/sid", Distro::DebianTrixie) + .Case("forky/sid", Distro::DebianForky) + .Case("duke/sid", Distro::DebianDuke) .Default(Distro::UnknownDistro); } From a7166c37394612a58bcd237cefbb8fce7424e747 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 2 May 2025 19:19:39 -0700 Subject: [PATCH 16/83] release/20.x: [clang-format] RemoveParentheses shouldn't remove empty parentheses (#138229) Backport d3506ee573a2aa1403817642ef45f8c0305bb572 --- clang/lib/Format/UnwrappedLineParser.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 2b348c926294e..c3ffabce15ec8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2581,7 +2581,8 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { if (Prev) { auto OptionalParens = [&] { if (MightBeStmtExpr || MightBeFoldExpr || Line->InMacroBody || - SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave) { + SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave || + RParen->getPreviousNonComment() == LParen) { return false; } const bool DoubleParens = diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index bf3eff129efd5..49e1fde1d9ccf 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -27895,6 +27895,8 @@ TEST_F(FormatTest, RemoveParentheses) { verifyFormat("return ((... && std::is_convertible_v));", "return (((... && std::is_convertible_v)));", Style); + verifyFormat("MOCK_METHOD(void, Function, (), override);", + "MOCK_METHOD(void, Function, (), (override));", Style); Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement; verifyFormat("#define Return0 return (0);", Style); From d34d5296095b013bfdec511a06a81777c992e1e3 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 11 Apr 2025 20:03:23 +0200 Subject: [PATCH 17/83] Support z17 processor name and scheduler description The recently announced IBM z17 processor implements the architecture already supported as "arch15" in LLVM. This patch adds support for "z17" as an alternate architecture name for arch15. This patch also add the scheduler description for the z17 processor, provided by Jonas Paulsson. Manual backport of https://github.com/llvm/llvm-project/pull/135254 --- clang/lib/Basic/Targets/SystemZ.cpp | 2 +- .../CodeGen/SystemZ/builtins-systemz-bitop.c | 4 +- .../SystemZ/builtins-systemz-vector5-error.c | 2 +- .../SystemZ/builtins-systemz-vector5.c | 2 +- .../SystemZ/builtins-systemz-zvector5-error.c | 2 +- .../SystemZ/builtins-systemz-zvector5.c | 4 +- .../test/CodeGen/SystemZ/systemz-abi-vector.c | 2 + clang/test/CodeGen/SystemZ/systemz-abi.c | 2 + clang/test/Driver/systemz-march.c | 2 + .../Misc/target-invalid-cpu-note/systemz.c | 1 + .../Preprocessor/predefined-arch-macros.c | 3 + .../Target/SystemZ/SystemZISelLowering.cpp | 8 +- llvm/lib/Target/SystemZ/SystemZInstrVector.td | 4 +- llvm/lib/Target/SystemZ/SystemZProcessors.td | 3 +- llvm/lib/Target/SystemZ/SystemZSchedule.td | 1 + llvm/lib/Target/SystemZ/SystemZScheduleZ16.td | 16 +- llvm/lib/Target/SystemZ/SystemZScheduleZ17.td | 1754 +++++++++++++++++ llvm/lib/TargetParser/Host.cpp | 2 +- .../Analysis/CostModel/SystemZ/divrem-reg.ll | 86 +- .../CostModel/SystemZ/i128-cmp-ext-conv.ll | 8 +- .../Analysis/CostModel/SystemZ/int-arith.ll | 10 +- llvm/test/CodeGen/SystemZ/args-12.ll | 2 +- llvm/test/CodeGen/SystemZ/args-13.ll | 2 +- llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll | 2 +- llvm/test/CodeGen/SystemZ/int-abs-03.ll | 4 +- llvm/test/CodeGen/SystemZ/int-add-19.ll | 2 +- llvm/test/CodeGen/SystemZ/int-cmp-64.ll | 4 +- llvm/test/CodeGen/SystemZ/int-conv-15.ll | 4 +- llvm/test/CodeGen/SystemZ/int-div-08.ll | 4 +- llvm/test/CodeGen/SystemZ/int-max-02.ll | 4 +- llvm/test/CodeGen/SystemZ/int-min-02.ll | 4 +- llvm/test/CodeGen/SystemZ/int-mul-14.ll | 4 +- llvm/test/CodeGen/SystemZ/int-mul-15.ll | 4 +- llvm/test/CodeGen/SystemZ/int-mul-16.ll | 4 +- llvm/test/CodeGen/SystemZ/int-neg-04.ll | 4 +- llvm/test/CodeGen/SystemZ/int-sub-12.ll | 2 +- llvm/test/CodeGen/SystemZ/llxa-01.ll | 2 +- llvm/test/CodeGen/SystemZ/llxa-02.ll | 2 +- llvm/test/CodeGen/SystemZ/llxa-03.ll | 2 +- llvm/test/CodeGen/SystemZ/llxa-04.ll | 2 +- llvm/test/CodeGen/SystemZ/llxa-05.ll | 2 +- llvm/test/CodeGen/SystemZ/lxa-01.ll | 2 +- llvm/test/CodeGen/SystemZ/lxa-02.ll | 2 +- llvm/test/CodeGen/SystemZ/lxa-03.ll | 2 +- llvm/test/CodeGen/SystemZ/lxa-04.ll | 2 +- llvm/test/CodeGen/SystemZ/lxa-05.ll | 2 +- llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll | 2 +- llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll | 2 +- llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll | 2 +- llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-cmp-09.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-div-03.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-eval.ll | 58 +- .../test/CodeGen/SystemZ/vec-intrinsics-05.ll | 4 +- llvm/test/CodeGen/SystemZ/vec-mul-06.ll | 4 +- .../{insns-arch15.txt => insns-z17.txt} | 4 +- .../{insn-bad-arch15.s => insn-bad-z17.s} | 4 +- .../{insn-good-arch15.s => insn-good-z17.s} | 4 +- llvm/unittests/TargetParser/Host.cpp | 2 +- 59 files changed, 1926 insertions(+), 160 deletions(-) create mode 100644 llvm/lib/Target/SystemZ/SystemZScheduleZ17.td rename llvm/test/MC/Disassembler/SystemZ/{insns-arch15.txt => insns-z17.txt} (99%) rename llvm/test/MC/SystemZ/{insn-bad-arch15.s => insn-bad-z17.s} (98%) rename llvm/test/MC/SystemZ/{insn-good-arch15.s => insn-good-z17.s} (99%) diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp index c836d110d26d5..6326188b3bd18 100644 --- a/clang/lib/Basic/Targets/SystemZ.cpp +++ b/clang/lib/Basic/Targets/SystemZ.cpp @@ -105,7 +105,7 @@ static constexpr ISANameRevision ISARevisions[] = { {{"arch12"}, 12}, {{"z14"}, 12}, {{"arch13"}, 13}, {{"z15"}, 13}, {{"arch14"}, 14}, {{"z16"}, 14}, - {{"arch15"}, 15}, + {{"arch15"}, 15}, {{"z17"}, 15}, }; int SystemZTargetInfo::getISARevision(StringRef Name) const { diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c index 5b4051c8d6f17..717a7d7ab49e2 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c @@ -1,6 +1,6 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s unsigned long test_bdepg(unsigned long a, unsigned long b) { // CHECK-LABEL: test_bdepg diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c index 3943a15af9d2f..8275b9ddb88a8 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c @@ -1,5 +1,5 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-unknown-unknown \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-unknown-unknown \ // RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s typedef __attribute__((vector_size(16))) signed char vec_schar; diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c index c3621819e71f9..b765fa64b33d4 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c @@ -1,5 +1,5 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -flax-vector-conversions=none \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s typedef __attribute__((vector_size(16))) signed char vec_schar; diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c index 9f4844efd6312..79041b923068e 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c @@ -1,5 +1,5 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \ // RUN: -fzvector -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c index 7a29dbf552e0b..6ee9e1ee3a117 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c @@ -1,8 +1,8 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \ // RUN: -O2 -fzvector -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \ // RUN: -O2 -fzvector -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -S %s -o - | FileCheck %s --check-prefix=CHECK-ASM diff --git a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c index 1e1926678ec33..e5704709a3a33 100644 --- a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c +++ b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c @@ -18,6 +18,8 @@ // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \ // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu z17 \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \ // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s diff --git a/clang/test/CodeGen/SystemZ/systemz-abi.c b/clang/test/CodeGen/SystemZ/systemz-abi.c index 58081bdc6cc2a..7de425950e9fd 100644 --- a/clang/test/CodeGen/SystemZ/systemz-abi.c +++ b/clang/test/CodeGen/SystemZ/systemz-abi.c @@ -24,6 +24,8 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \ // RUN: -emit-llvm -o - %s -mfloat-abi soft | FileCheck %s \ // RUN: --check-prefixes=CHECK,SOFT-FLOAT +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu z17 \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \ // RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \ diff --git a/clang/test/Driver/systemz-march.c b/clang/test/Driver/systemz-march.c index 93a11c6c9c013..8922db9f2d5d6 100644 --- a/clang/test/Driver/systemz-march.c +++ b/clang/test/Driver/systemz-march.c @@ -15,6 +15,7 @@ // RUN: %clang -target s390x -### -S -emit-llvm -march=arch13 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH13 %s // RUN: %clang -target s390x -### -S -emit-llvm -march=z16 %s 2>&1 | FileCheck --check-prefix=CHECK-Z16 %s // RUN: %clang -target s390x -### -S -emit-llvm -march=arch14 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH14 %s +// RUN: %clang -target s390x -### -S -emit-llvm -march=z17 %s 2>&1 | FileCheck --check-prefix=CHECK-Z17 %s // RUN: %clang -target s390x -### -S -emit-llvm -march=arch15 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH15 %s // CHECK-Z9: error: unknown target CPU 'z9' @@ -32,6 +33,7 @@ // CHECK-ARCH13: "-target-cpu" "arch13" // CHECK-Z16: "-target-cpu" "z16" // CHECK-ARCH14: "-target-cpu" "arch14" +// CHECK-Z17: "-target-cpu" "z17" // CHECK-ARCH15: "-target-cpu" "arch15" int x; diff --git a/clang/test/Misc/target-invalid-cpu-note/systemz.c b/clang/test/Misc/target-invalid-cpu-note/systemz.c index b70173f5feec2..021c280d53190 100644 --- a/clang/test/Misc/target-invalid-cpu-note/systemz.c +++ b/clang/test/Misc/target-invalid-cpu-note/systemz.c @@ -20,4 +20,5 @@ // CHECK-SAME: {{^}}, arch14 // CHECK-SAME: {{^}}, z16 // CHECK-SAME: {{^}}, arch15 +// CHECK-SAME: {{^}}, z17 // CHECK-SAME: {{$}} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index f267f1759cdb5..2d17891071aae 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4394,6 +4394,9 @@ // RUN: %clang -march=arch15 -E -dM %s -o - 2>&1 \ // RUN: -target s390x-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15 +// RUN: %clang -march=z17 -E -dM %s -o - 2>&1 \ +// RUN: -target s390x-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15 // CHECK_SYSTEMZ_ARCH15: #define __ARCH__ 15 // CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 // CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2b8269e440e90..049865c81667a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -254,7 +254,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ROTR, MVT::i128, Expand); setOperationAction(ISD::ROTL, MVT::i128, Expand); - // No special instructions for these before arch15. + // No special instructions for these before z17. if (!Subtarget.hasVectorEnhancements3()) { setOperationAction(ISD::MUL, MVT::i128, Expand); setOperationAction(ISD::MULHS, MVT::i128, Expand); @@ -281,7 +281,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Use VPOPCT and add up partial results. setOperationAction(ISD::CTPOP, MVT::i128, Custom); - // Additional instructions available with arch15. + // Additional instructions available with z17. if (Subtarget.hasVectorEnhancements3()) { setOperationAction(ISD::ABS, MVT::i128, Legal); } @@ -353,7 +353,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); - // On arch15 we have native support for a 64-bit CTTZ. + // On z17 we have native support for a 64-bit CTTZ. if (Subtarget.hasMiscellaneousExtensions4()) { setOperationAction(ISD::CTTZ, MVT::i32, Promote); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Promote); @@ -4466,7 +4466,7 @@ SDValue SystemZTargetLowering::lowerMULH(SDValue Op, SDLoc DL(Op); SDValue Even, Odd; - // This custom expander is only used on arch15 and later for 64-bit types. + // This custom expander is only used on z17 and later for 64-bit types. assert(!is32Bit(VT)); assert(Subtarget.hasMiscellaneousExtensions2()); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index edd20a5de8c63..a4ece32c79d88 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1973,7 +1973,7 @@ let Predicates = [FeatureVector] in { (VLEG (VGBM 0), bdxaddr12only:$addr, 1)>; } -// In-register i128 sign-extensions on arch15. +// In-register i128 sign-extensions on z17. let Predicates = [FeatureVectorEnhancements3] in { def : Pat<(i128 (sext_inreg VR128:$x, i8)), (VUPLG (VSEGB VR128:$x))>; def : Pat<(i128 (sext_inreg VR128:$x, i16)), (VUPLG (VSEGH VR128:$x))>; @@ -1993,7 +1993,7 @@ let Predicates = [FeatureVector] in { (VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>; } -// Sign-extensions from GPR to i128 on arch15. +// Sign-extensions from GPR to i128 on z17. let Predicates = [FeatureVectorEnhancements3] in { def : Pat<(i128 (sext_inreg (anyext GR32:$x), i8)), (VUPLG (VLVGP (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$x, subreg_l32)), diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td index 75b6671dc7723..0827701a48b5a 100644 --- a/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -41,4 +41,5 @@ def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>; def : ProcessorModel<"arch14", Z16Model, Arch14SupportedFeatures.List>; def : ProcessorModel<"z16", Z16Model, Arch14SupportedFeatures.List>; -def : ProcessorModel<"arch15", Z16Model, Arch15SupportedFeatures.List>; +def : ProcessorModel<"arch15", Z17Model, Arch15SupportedFeatures.List>; +def : ProcessorModel<"z17", Z17Model, Arch15SupportedFeatures.List>; diff --git a/llvm/lib/Target/SystemZ/SystemZSchedule.td b/llvm/lib/Target/SystemZ/SystemZSchedule.td index d683cc042e5c9..cc03a71d8a649 100644 --- a/llvm/lib/Target/SystemZ/SystemZSchedule.td +++ b/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -60,6 +60,7 @@ def VBU : SchedWrite; // Virtual branching unit def MCD : SchedWrite; // Millicode +include "SystemZScheduleZ17.td" include "SystemZScheduleZ16.td" include "SystemZScheduleZ15.td" include "SystemZScheduleZ14.td" diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td index 2c01691707cc3..a9354ea76c72c 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -1555,12 +1555,12 @@ def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCH(S|D|X)?P$")>; def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCSHP$")>; -def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH")>; -def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP")>; -def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR")>; -def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR")>; -def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH")>; -def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH$")>; +def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR$")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR$")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH$")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL$")>; // -------------------------------- System ---------------------------------- // @@ -1597,8 +1597,8 @@ def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; // System: Breaking-Event-Address-Register Instructions //===----------------------------------------------------------------------===// -def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR")>; -def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR$")>; +def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR$")>; //===----------------------------------------------------------------------===// // System: Storage-Key and Real Memory Instructions diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td new file mode 100644 index 0000000000000..bd52627f636a7 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ17.td @@ -0,0 +1,1754 @@ +//--- SystemZScheduleZ17.td - SystemZ Scheduling Definitions ---*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Z17 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Z17Model : SchedMachineModel { + + let UnsupportedFeatures = Arch15UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Z17Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes; + def : WriteRes { let BeginGroup = 1; } + def : WriteRes { let EndGroup = 1; } +} +def : WriteRes { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Z17_FXaUnit : ProcResource<2>; +def Z17_FXbUnit : ProcResource<2>; +def Z17_LSUnit : ProcResource<2>; +def Z17_VecUnit : ProcResource<2>; +def Z17_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z17_VBUnit : ProcResource<2>; +def Z17_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + foreach Num = 2-5 in { let ReleaseAtCycles = [Num] in { + def : WriteRes("FXa"#Num), [Z17_FXaUnit]>; + def : WriteRes("FXb"#Num), [Z17_FXbUnit]>; + def : WriteRes("LSU"#Num), [Z17_LSUnit]>; + def : WriteRes("VecBF"#Num), [Z17_VecUnit]>; + def : WriteRes("VecDF"#Num), [Z17_VecUnit]>; + def : WriteRes("VecDFX"#Num), [Z17_VecUnit]>; + def : WriteRes("VecMul"#Num), [Z17_VecUnit]>; + def : WriteRes("VecStr"#Num), [Z17_VecUnit]>; + def : WriteRes("VecXsPm"#Num), [Z17_VecUnit]>; + }} + + def : WriteRes { let ReleaseAtCycles = [30]; } + def : WriteRes { let ReleaseAtCycles = [20]; } + + def : WriteRes; // Virtual Branching Unit +} + +def : WriteRes { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +// Load (logical) indexed address. +def : InstRW<[WLat2, FXa2, NormalGr], (instregex "(L)?LXA(B|H|F|G|Q)$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NOT(G)?R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat5, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat5, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat9, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat5, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat4LSU, WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat4LSU, WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat4, WLat4, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat4, WLat4, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)(Opt)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)(Opt)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?(Z)?(Opt)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +// Compare and load +def : InstRW<[WLat30, MCD], (instregex "CAL(G|GF)?$")>; + +// Perform functions with concurrent results +def : InstRW<[WLat30, MCD], (instregex "PFCR$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Count leading/trailing zeros. +def : InstRW<[WLat3, FXa, NormalGr], (instregex "C(L|T)ZG$")>; + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// Bit deposit and bit extract. +def : InstRW<[WLat4, FXa, NormalGr], (instregex "(BDEPG|BEXTG)$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "NNPA$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQEBR$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDBR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DEB$")>; +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DEBR$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDBR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQER$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DE$")>; +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DD$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DER$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat20, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBLEND(B|F|G|H|Q)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGEM(B|H|F|G|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H|G)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F|G)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|G|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H|Q)?$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VD(L)?(F|G|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VEVAL$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H|Q)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F|G|Q)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|G|Q|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H|G|Q)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H|G|Q)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F|G|Q)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|G|Q|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H|G)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H|G)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VR(L)?(F|G|Q)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H|Q)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H|Q)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H|Q)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H|Q)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H|Q)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H|Q)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H|Q)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat20, VecDF, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// NNP assist instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCFN$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLFN(L|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VC(R)?NF$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF, FXb, GroupAlone], + (instregex "VCVB(G|Q)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF, FXb, GroupAlone], + (instregex "VCVD(G|Q)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "VSRP(R)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)(P|Z)(Opt)?$")>; + +def : InstRW<[WLat20, VecDF, NormalGr], (instregex "VSCH(S|D|X)?P$")>; +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "VSCSHP$")>; +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "VCSPH$")>; +def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP$")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR$")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH$")>; +def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL$")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?(Y)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Breaking-Event-Address-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR$")>; +def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RDP(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "QPACI$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRACE$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TRACG$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPE?I$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +//===----------------------------------------------------------------------===// +// NOPs +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NOP(R)?(Opt)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "J(G)?NOP$")>; +} + diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index fa57ae183bb84..4c60698a63eff 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -428,7 +428,7 @@ StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { case 9175: case 9176: default: - return HaveVectorSupport? "arch15" : "zEC12"; + return HaveVectorSupport? "z17" : "zEC12"; } } } // end anonymous namespace diff --git a/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll b/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll index 2f13d7e3ef9b1..68ffe5759e135 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13 -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s --check-prefixes=CHECK,ARC15 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z17 | FileCheck %s --check-prefixes=CHECK,Z17 ; Check costs of divisions by register ; @@ -52,9 +52,9 @@ define <2 x i64> @fun4(<2 x i64> %a, <2 x i64> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = sdiv <2 x i64> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; -; ARC15-LABEL: 'fun4' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i64> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r +; Z17-LABEL: 'fun4' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i64> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = sdiv <2 x i64> %a, %b ret <2 x i64> %r @@ -65,9 +65,9 @@ define <4 x i32> @fun5(<4 x i32> %a, <4 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = sdiv <4 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; -; ARC15-LABEL: 'fun5' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <4 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r +; Z17-LABEL: 'fun5' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <4 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = sdiv <4 x i32> %a, %b ret <4 x i32> %r @@ -78,9 +78,9 @@ define <2 x i32> @fun6(<2 x i32> %a, <2 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = sdiv <2 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; -; ARC15-LABEL: 'fun6' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r +; Z17-LABEL: 'fun6' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = sdiv <2 x i32> %a, %b ret <2 x i32> %r @@ -167,9 +167,9 @@ define <2 x i64> @fun15(<2 x i64> %a, <2 x i64> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = udiv <2 x i64> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; -; ARC15-LABEL: 'fun15' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i64> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r +; Z17-LABEL: 'fun15' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i64> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = udiv <2 x i64> %a, %b ret <2 x i64> %r @@ -180,9 +180,9 @@ define <4 x i32> @fun16(<4 x i32> %a, <4 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = udiv <4 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; -; ARC15-LABEL: 'fun16' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <4 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r +; Z17-LABEL: 'fun16' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <4 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = udiv <4 x i32> %a, %b ret <4 x i32> %r @@ -193,9 +193,9 @@ define <2 x i32> @fun17(<2 x i32> %a, <2 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = udiv <2 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; -; ARC15-LABEL: 'fun17' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r +; Z17-LABEL: 'fun17' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = udiv <2 x i32> %a, %b ret <2 x i32> %r @@ -282,9 +282,9 @@ define <2 x i64> @fun26(<2 x i64> %a, <2 x i64> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = srem <2 x i64> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; -; ARC15-LABEL: 'fun26' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i64> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r +; Z17-LABEL: 'fun26' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i64> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = srem <2 x i64> %a, %b ret <2 x i64> %r @@ -295,9 +295,9 @@ define <4 x i32> @fun27(<4 x i32> %a, <4 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = srem <4 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; -; ARC15-LABEL: 'fun27' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <4 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r +; Z17-LABEL: 'fun27' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <4 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = srem <4 x i32> %a, %b ret <4 x i32> %r @@ -308,9 +308,9 @@ define <2 x i32> @fun28(<2 x i32> %a, <2 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = srem <2 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; -; ARC15-LABEL: 'fun28' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r +; Z17-LABEL: 'fun28' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = srem <2 x i32> %a, %b ret <2 x i32> %r @@ -397,9 +397,9 @@ define <2 x i64> @fun37(<2 x i64> %a, <2 x i64> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = urem <2 x i64> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; -; ARC15-LABEL: 'fun37' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i64> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r +; Z17-LABEL: 'fun37' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i64> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = urem <2 x i64> %a, %b ret <2 x i64> %r @@ -410,9 +410,9 @@ define <4 x i32> @fun38(<4 x i32> %a, <4 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = urem <4 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; -; ARC15-LABEL: 'fun38' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <4 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r +; Z17-LABEL: 'fun38' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <4 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = urem <4 x i32> %a, %b ret <4 x i32> %r @@ -423,9 +423,9 @@ define <2 x i32> @fun39(<2 x i32> %a, <2 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = urem <2 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; -; ARC15-LABEL: 'fun39' -; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r +; Z17-LABEL: 'fun39' +; Z17-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = urem <2 x i32> %a, %b ret <2 x i32> %r @@ -473,9 +473,9 @@ define <8 x i64> @fun44(<8 x i64> %a, <8 x i64> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i64> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r ; -; ARC15-LABEL: 'fun44' -; ARC15-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r = sdiv <8 x i64> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r +; Z17-LABEL: 'fun44' +; Z17-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r = sdiv <8 x i64> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r ; %r = sdiv <8 x i64> %a, %b ret <8 x i64> %r @@ -486,9 +486,9 @@ define <8 x i32> @fun45(<8 x i32> %a, <8 x i32> %b) { ; Z13-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i32> %a, %b ; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r ; -; ARC15-LABEL: 'fun45' -; ARC15-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r = urem <8 x i32> %a, %b -; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r +; Z17-LABEL: 'fun45' +; Z17-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r = urem <8 x i32> %a, %b +; Z17-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r ; %r = urem <8 x i32> %a, %b ret <8 x i32> %r diff --git a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll index 105e634cea1ac..ba86c9ab1d702 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll @@ -1,12 +1,12 @@ ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13 -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s --check-prefixes=CHECK,ARC15 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z17 | FileCheck %s --check-prefixes=CHECK,Z17 ; define i128 @fun1(i128 %val1, i128 %val2) { ; CHECK-LABEL: 'fun1' ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2 ; Z13: Cost Model: Found an estimated cost of 5 for instruction: %v128 = sext i1 %cmp to i128 -; ARC15: Cost Model: Found an estimated cost of 0 for instruction: %v128 = sext i1 %cmp to i128 +; Z17: Cost Model: Found an estimated cost of 0 for instruction: %v128 = sext i1 %cmp to i128 %cmp = icmp eq i128 %val1, %val2 %v128 = sext i1 %cmp to i128 ret i128 %v128 @@ -27,7 +27,7 @@ define i128 @fun3(i128 %val1, i128 %val2, ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i128 %val3, %val4 ; Z13: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add -; ARC15: Cost Model: Found an estimated cost of 1 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add +; Z17: Cost Model: Found an estimated cost of 1 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add %cmp = icmp eq i128 %val1, %val2 %add = add i128 %val3, %val4 %sel = select i1 %cmp, i128 %val3, i128 %add @@ -40,7 +40,7 @@ define i64 @fun3_sel64(i128 %val1, i128 %val2, ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ugt i128 %val1, %val2 ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i64 %val3, %val4 ; Z13: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %add -; ARC15: Cost Model: Found an estimated cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %add +; Z17: Cost Model: Found an estimated cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %add %cmp = icmp ugt i128 %val1, %val2 %add = add i64 %val3, %val4 %sel = select i1 %cmp, i64 %val3, i64 %add diff --git a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll index bf5cbfb48a77b..ebeb2df281237 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s -check-prefix=ARC15 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z17 | FileCheck %s -check-prefix=Z17 ; ; Note: The scalarized vector instructions costs are not including any ; extracts, due to the undef operands. @@ -132,22 +132,22 @@ define void @mul() { ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = mul <2 x i16> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = mul <2 x i32> undef, undef ; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %res7 = mul <2 x i64> undef, undef -; ARC15: Cost Model: Found an estimated cost of 1 for instruction: %res7 = mul <2 x i64> undef, undef +; Z17: Cost Model: Found an estimated cost of 1 for instruction: %res7 = mul <2 x i64> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <4 x i8> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = mul <4 x i16> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = mul <4 x i32> undef, undef ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res11 = mul <4 x i64> undef, undef -; ARC15: Cost Model: Found an estimated cost of 2 for instruction: %res11 = mul <4 x i64> undef, undef +; Z17: Cost Model: Found an estimated cost of 2 for instruction: %res11 = mul <4 x i64> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = mul <8 x i8> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = mul <8 x i16> undef, undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = mul <8 x i32> undef, undef ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res15 = mul <8 x i64> undef, undef -; ARC15: Cost Model: Found an estimated cost of 4 for instruction: %res15 = mul <8 x i64> undef, undef +; Z17: Cost Model: Found an estimated cost of 4 for instruction: %res15 = mul <8 x i64> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i8> undef, undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = mul <16 x i16> undef, undef ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = mul <16 x i32> undef, undef ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res19 = mul <16 x i64> undef, undef -; ARC15: Cost Model: Found an estimated cost of 8 for instruction: %res19 = mul <16 x i64> undef, undef +; Z17: Cost Model: Found an estimated cost of 8 for instruction: %res19 = mul <16 x i64> undef, undef ret void; } diff --git a/llvm/test/CodeGen/SystemZ/args-12.ll b/llvm/test/CodeGen/SystemZ/args-12.ll index f8954eee550f5..472672bbfd5ca 100644 --- a/llvm/test/CodeGen/SystemZ/args-12.ll +++ b/llvm/test/CodeGen/SystemZ/args-12.ll @@ -2,7 +2,7 @@ ; Test the handling of i128 argument values ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s declare void @bar(i64, i64, i64, i64, i128, i64, i64, i64, i64, i128) diff --git a/llvm/test/CodeGen/SystemZ/args-13.ll b/llvm/test/CodeGen/SystemZ/args-13.ll index d9e986cbb6a4b..29a718901e811 100644 --- a/llvm/test/CodeGen/SystemZ/args-13.ll +++ b/llvm/test/CodeGen/SystemZ/args-13.ll @@ -2,7 +2,7 @@ ; Test incoming i128 arguments. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Do some arithmetic so that we can see the register being used. define void @f1(ptr %r2, i16 %r3, i32 %r4, i64 %r5, i128 %r6) { diff --git a/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll b/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll index f5b0aaa243a79..bbd9be463a014 100644 --- a/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test bit deposit / extract intrinsics ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s declare i64 @llvm.s390.bdepg(i64, i64) declare i64 @llvm.s390.bextg(i64, i64) diff --git a/llvm/test/CodeGen/SystemZ/int-abs-03.ll b/llvm/test/CodeGen/SystemZ/int-abs-03.ll index 238b2431c9b30..2a8969c27fbc0 100644 --- a/llvm/test/CodeGen/SystemZ/int-abs-03.ll +++ b/llvm/test/CodeGen/SystemZ/int-abs-03.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit absolute value in vector registers on arch15 +; Test 128-bit absolute value in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s define i128 @f1(i128 %src) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/int-add-19.ll b/llvm/test/CodeGen/SystemZ/int-add-19.ll index a9bce2c827ff9..f5ef08b4514f9 100644 --- a/llvm/test/CodeGen/SystemZ/int-add-19.ll +++ b/llvm/test/CodeGen/SystemZ/int-add-19.ll @@ -2,7 +2,7 @@ ; Test 128-bit addition in vector registers on z13 and later ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s define i128 @f1(i128 %a, i128 %b) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-64.ll b/llvm/test/CodeGen/SystemZ/int-cmp-64.ll index be212ef2a7211..821a57bf30bc1 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-64.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit comparisons in vector registers on arch15 +; Test 128-bit comparisons in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 -verify-machineinstrs | FileCheck %s ; Equality comparison. define i64 @f1(i128 %value1, i128 %value2, i64 %a, i64 %b) { diff --git a/llvm/test/CodeGen/SystemZ/int-conv-15.ll b/llvm/test/CodeGen/SystemZ/int-conv-15.ll index bea0bb8890315..0d8ee75b10b85 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-15.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-15.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit arithmetic in vector registers on arch15 +; Test 128-bit arithmetic in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Sign extension from i64. define i128 @f1(i64 %a) { diff --git a/llvm/test/CodeGen/SystemZ/int-div-08.ll b/llvm/test/CodeGen/SystemZ/int-div-08.ll index a3723c1257974..5838d4913c862 100644 --- a/llvm/test/CodeGen/SystemZ/int-div-08.ll +++ b/llvm/test/CodeGen/SystemZ/int-div-08.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit division and remainder in vector registers on arch15 +; Test 128-bit division and remainder in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Divide signed. define i128 @f1(i128 %a, i128 %b) { diff --git a/llvm/test/CodeGen/SystemZ/int-max-02.ll b/llvm/test/CodeGen/SystemZ/int-max-02.ll index bd5e9593e25e9..5f5188c66065d 100644 --- a/llvm/test/CodeGen/SystemZ/int-max-02.ll +++ b/llvm/test/CodeGen/SystemZ/int-max-02.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test i128 maximum on arch15. +; Test i128 maximum on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Test with slt. define i128 @f1(i128 %val1, i128 %val2) { diff --git a/llvm/test/CodeGen/SystemZ/int-min-02.ll b/llvm/test/CodeGen/SystemZ/int-min-02.ll index e4cdd25fbc006..3066af924fb8e 100644 --- a/llvm/test/CodeGen/SystemZ/int-min-02.ll +++ b/llvm/test/CodeGen/SystemZ/int-min-02.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test i128 minimum on arch15. +; Test i128 minimum on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Test with slt. define i128 @f1(i128 %val1, i128 %val2) { diff --git a/llvm/test/CodeGen/SystemZ/int-mul-14.ll b/llvm/test/CodeGen/SystemZ/int-mul-14.ll index e7e0889634d10..6678e90f3bfad 100644 --- a/llvm/test/CodeGen/SystemZ/int-mul-14.ll +++ b/llvm/test/CodeGen/SystemZ/int-mul-14.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit multiplication in vector registers on arch15 +; Test 128-bit multiplication in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Multiplication. define i128 @f1(i128 %a, i128 %b) { diff --git a/llvm/test/CodeGen/SystemZ/int-mul-15.ll b/llvm/test/CodeGen/SystemZ/int-mul-15.ll index a4a0faa0cb0c8..b7d41412d9c5f 100644 --- a/llvm/test/CodeGen/SystemZ/int-mul-15.ll +++ b/llvm/test/CodeGen/SystemZ/int-mul-15.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; Test high-part i64->i128 multiplications on arch15. +; Test high-part i64->i128 multiplications on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Check zero-extended multiplication in which only the high part is used. define i64 @f1(i64 %dummy, i64 %a, i64 %b) { diff --git a/llvm/test/CodeGen/SystemZ/int-mul-16.ll b/llvm/test/CodeGen/SystemZ/int-mul-16.ll index d84ca93e3b12c..772c419dfc8e0 100644 --- a/llvm/test/CodeGen/SystemZ/int-mul-16.ll +++ b/llvm/test/CodeGen/SystemZ/int-mul-16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test high-part i128->i256 multiplications on arch15. +; Test high-part i128->i256 multiplications on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Multiply high signed. define i128 @f1(i128 %a, i128 %b) { diff --git a/llvm/test/CodeGen/SystemZ/int-neg-04.ll b/llvm/test/CodeGen/SystemZ/int-neg-04.ll index 05b7b397e735d..a6da2db7d14b4 100644 --- a/llvm/test/CodeGen/SystemZ/int-neg-04.ll +++ b/llvm/test/CodeGen/SystemZ/int-neg-04.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit negation in vector registers on arch15 +; Test 128-bit negation in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s define i128 @f1(i128 %src) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/int-sub-12.ll b/llvm/test/CodeGen/SystemZ/int-sub-12.ll index 8f7d816d5cbd2..44d2adfb41dc7 100644 --- a/llvm/test/CodeGen/SystemZ/int-sub-12.ll +++ b/llvm/test/CodeGen/SystemZ/int-sub-12.ll @@ -2,7 +2,7 @@ ; Test 128-bit subtraction in vector registers on z13 and later ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s define i128 @f1(i128 %a, i128 %b) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/llxa-01.ll b/llvm/test/CodeGen/SystemZ/llxa-01.ll index 19bc6ef31a286..2c57556dc9ee2 100644 --- a/llvm/test/CodeGen/SystemZ/llxa-01.ll +++ b/llvm/test/CodeGen/SystemZ/llxa-01.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD LOGICAL INDEXED ADDRESS byte instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; DO NOT USE: LLXAB with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/llxa-02.ll b/llvm/test/CodeGen/SystemZ/llxa-02.ll index 0ca2527dcb25e..e2cd929a0bc94 100644 --- a/llvm/test/CodeGen/SystemZ/llxa-02.ll +++ b/llvm/test/CodeGen/SystemZ/llxa-02.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD LOGICAL INDEXED ADDRESS halfword instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LLXAH with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/llxa-03.ll b/llvm/test/CodeGen/SystemZ/llxa-03.ll index b6c9406785188..b5c91b1d7e607 100644 --- a/llvm/test/CodeGen/SystemZ/llxa-03.ll +++ b/llvm/test/CodeGen/SystemZ/llxa-03.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD LOGICAL INDEXED ADDRESS word instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LLXAF with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/llxa-04.ll b/llvm/test/CodeGen/SystemZ/llxa-04.ll index 9c5cd2f54bc67..186892dd755a7 100644 --- a/llvm/test/CodeGen/SystemZ/llxa-04.ll +++ b/llvm/test/CodeGen/SystemZ/llxa-04.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD LOGICAL INDEXED ADDRESS doubleword instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LLXAG with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/llxa-05.ll b/llvm/test/CodeGen/SystemZ/llxa-05.ll index eba400f6d2564..1e5880de57d58 100644 --- a/llvm/test/CodeGen/SystemZ/llxa-05.ll +++ b/llvm/test/CodeGen/SystemZ/llxa-05.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD LOGICAL INDEXED ADDRESS quadword instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LLXAQ with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/lxa-01.ll b/llvm/test/CodeGen/SystemZ/lxa-01.ll index fb3edeaaeb381..8bba6f78f503d 100644 --- a/llvm/test/CodeGen/SystemZ/lxa-01.ll +++ b/llvm/test/CodeGen/SystemZ/lxa-01.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD INDEXED ADDRESS byte instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; DO NOT USE: LXAB with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/lxa-02.ll b/llvm/test/CodeGen/SystemZ/lxa-02.ll index 64816fa24838e..c233bf7d28a5a 100644 --- a/llvm/test/CodeGen/SystemZ/lxa-02.ll +++ b/llvm/test/CodeGen/SystemZ/lxa-02.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD INDEXED ADDRESS halfword instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LXAH with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/lxa-03.ll b/llvm/test/CodeGen/SystemZ/lxa-03.ll index e73d43a48ebd8..43e9b4d14d6c6 100644 --- a/llvm/test/CodeGen/SystemZ/lxa-03.ll +++ b/llvm/test/CodeGen/SystemZ/lxa-03.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD INDEXED ADDRESS word instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LXAF with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/lxa-04.ll b/llvm/test/CodeGen/SystemZ/lxa-04.ll index 7b6764cf22faf..96af585547e34 100644 --- a/llvm/test/CodeGen/SystemZ/lxa-04.ll +++ b/llvm/test/CodeGen/SystemZ/lxa-04.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD INDEXED ADDRESS doubleword instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LXAG with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/lxa-05.ll b/llvm/test/CodeGen/SystemZ/lxa-05.ll index 0a45cba0b3f83..4f0b4e838f157 100644 --- a/llvm/test/CodeGen/SystemZ/lxa-05.ll +++ b/llvm/test/CodeGen/SystemZ/lxa-05.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of LOAD INDEXED ADDRESS quadword instructions. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; LXAQ with base and index. define dso_local ptr @f0(ptr %ptr, i32 %idx) { diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll index f18ee2418383c..3dbd18fb8cc60 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; ; FIXME: two consecutive immediate adds not fused in i16/i8 functions. diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll index bb50e6f417c42..10d28d571bb92 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s declare i128 @llvm.ctlz.i128(i128, i1) diff --git a/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll b/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll index 2f3a72160ae27..e1237280ae23e 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s declare i64 @llvm.cttz.i64(i64, i1) declare i32 @llvm.cttz.i32(i32, i1) diff --git a/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll b/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll index f440871fd4ff0..fdfebef1a1e18 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test 128-bit arithmetic in vector registers on arch15 +; Test 128-bit arithmetic in vector registers on z17 ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s declare i128 @llvm.cttz.i128(i128, i1) diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll index 3f6c86e685ea1..cb8850e58c589 100644 --- a/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll +++ b/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll @@ -1,6 +1,6 @@ -; Test usage of VBLEND on arch15. +; Test usage of VBLEND on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/vec-div-03.ll b/llvm/test/CodeGen/SystemZ/vec-div-03.ll index 96b161948e39b..1c2a702baf1a3 100644 --- a/llvm/test/CodeGen/SystemZ/vec-div-03.ll +++ b/llvm/test/CodeGen/SystemZ/vec-div-03.ll @@ -1,6 +1,6 @@ -; Test vector division on arch15. +; Test vector division on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Test a v4i32 signed division. define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll index 262ab0ea8bb2b..bcdedcd3a407b 100644 --- a/llvm/test/CodeGen/SystemZ/vec-eval.ll +++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test use of VECTOR EVALUATE for combined boolean operations. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s define <16 x i8> @eval0(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval0: @@ -279,8 +279,8 @@ entry: define <16 x i8> @eval24(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval24: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 2 +; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 ; CHECK-NEXT: br %r14 entry: @@ -376,8 +376,8 @@ entry: define <16 x i8> @eval30(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval30: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 2 +; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 ; CHECK-NEXT: br %r14 entry: @@ -596,8 +596,8 @@ entry: define <16 x i8> @eval45(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval45: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v0, %v28, %v24 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 +; CHECK-NEXT: vo %v0, %v28, %v24 ; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 ; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 47 ; CHECK-NEXT: br %r14 @@ -617,8 +617,8 @@ entry: define <16 x i8> @eval46(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval46: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 8 +; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 ; CHECK-NEXT: br %r14 entry: @@ -722,8 +722,8 @@ entry: define <16 x i8> @eval54(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval54: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vn %v1, %v28, %v24 ; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 2 +; CHECK-NEXT: vn %v1, %v28, %v24 ; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 ; CHECK-NEXT: br %r14 entry: @@ -770,8 +770,8 @@ entry: define <16 x i8> @eval57(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval57: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v0, %v28, %v26 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 +; CHECK-NEXT: vo %v0, %v28, %v26 ; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 47 ; CHECK-NEXT: veval %v24, %v1, %v24, %v0, 47 ; CHECK-NEXT: br %r14 @@ -1060,8 +1060,8 @@ define <16 x i8> @eval77(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval77: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vgbm %v0, 65535 -; CHECK-NEXT: vn %v1, %v26, %v24 ; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 +; CHECK-NEXT: vn %v1, %v26, %v24 ; CHECK-NEXT: veval %v0, %v28, %v0, %v1, 7 ; CHECK-NEXT: veval %v24, %v0, %v24, %v26, 47 ; CHECK-NEXT: br %r14 @@ -1540,10 +1540,10 @@ define <16 x i8> @eval109(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval109: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vgbm %v0, 65535 -; CHECK-NEXT: vn %v2, %v26, %v24 ; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 -; CHECK-NEXT: vo %v1, %v28, %v24 +; CHECK-NEXT: vn %v2, %v26, %v24 ; CHECK-NEXT: veval %v0, %v28, %v0, %v2, 7 +; CHECK-NEXT: vo %v1, %v28, %v24 ; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47 ; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 ; CHECK-NEXT: br %r14 @@ -1621,8 +1621,8 @@ define <16 x i8> @eval113(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval113: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vgbm %v0, 65535 -; CHECK-NEXT: vn %v1, %v26, %v24 ; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 +; CHECK-NEXT: vn %v1, %v26, %v24 ; CHECK-NEXT: veval %v0, %v28, %v0, %v1, 7 ; CHECK-NEXT: veval %v24, %v0, %v26, %v24, 47 ; CHECK-NEXT: br %r14 @@ -1731,8 +1731,8 @@ define <16 x i8> @eval120(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vno %v0, %v24, %v24 ; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2 -; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47 +; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 ; CHECK-NEXT: br %r14 entry: @@ -1753,10 +1753,10 @@ define <16 x i8> @eval121(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval121: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vgbm %v0, 65535 -; CHECK-NEXT: vn %v2, %v26, %v24 ; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 -; CHECK-NEXT: vo %v1, %v28, %v26 +; CHECK-NEXT: vn %v2, %v26, %v24 ; CHECK-NEXT: veval %v0, %v28, %v0, %v2, 7 +; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47 ; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 ; CHECK-NEXT: br %r14 @@ -1802,8 +1802,8 @@ define <16 x i8> @eval123(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vno %v0, %v24, %v24 ; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2 -; CHECK-NEXT: voc %v1, %v26, %v28 ; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47 +; CHECK-NEXT: voc %v1, %v26, %v28 ; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 31 ; CHECK-NEXT: br %r14 entry: @@ -2084,8 +2084,8 @@ entry: define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval141: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v0, %v26, %v24 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 +; CHECK-NEXT: vo %v0, %v26, %v24 ; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 ; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143 ; CHECK-NEXT: br %r14 @@ -2105,8 +2105,8 @@ entry: define <16 x i8> @eval142(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval142: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127 +; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174 ; CHECK-NEXT: br %r14 entry: @@ -2253,8 +2253,8 @@ entry: define <16 x i8> @eval151(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval151: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vx %v0, %v28, %v26 ; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2 +; CHECK-NEXT: vx %v0, %v28, %v26 ; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 31 ; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143 ; CHECK-NEXT: br %r14 @@ -2289,8 +2289,8 @@ entry: define <16 x i8> @eval153(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval153: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111 +; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v2, %v26, %v24, %v28, 1 ; CHECK-NEXT: veval %v24, %v2, %v0, %v1, 239 ; CHECK-NEXT: br %r14 @@ -2309,8 +2309,8 @@ entry: define <16 x i8> @eval154(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval154: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111 +; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v2, %v24, %v26, %v28, 2 ; CHECK-NEXT: veval %v24, %v2, %v0, %v1, 239 ; CHECK-NEXT: br %r14 @@ -2330,9 +2330,9 @@ entry: define <16 x i8> @eval155(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval155: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111 ; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: vn %v2, %v26, %v24 -; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111 ; CHECK-NEXT: veval %v24, %v2, %v0, %v1, 239 ; CHECK-NEXT: br %r14 entry: @@ -2365,8 +2365,8 @@ entry: define <16 x i8> @eval157(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval157: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vx %v0, %v28, %v26 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 +; CHECK-NEXT: vx %v0, %v28, %v26 ; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 ; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143 ; CHECK-NEXT: br %r14 @@ -2386,8 +2386,8 @@ entry: define <16 x i8> @eval158(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval158: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111 +; CHECK-NEXT: vn %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174 ; CHECK-NEXT: br %r14 entry: @@ -2685,8 +2685,8 @@ entry: define <16 x i8> @eval178(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval178: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vn %v1, %v26, %v24 ; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 138 +; CHECK-NEXT: vn %v1, %v26, %v24 ; CHECK-NEXT: veval %v24, %v0, %v1, %v28, 47 ; CHECK-NEXT: br %r14 entry: @@ -2778,8 +2778,8 @@ entry: define <16 x i8> @eval183(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval183: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: voc %v0, %v26, %v28 ; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2 +; CHECK-NEXT: voc %v0, %v26, %v28 ; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 31 ; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47 ; CHECK-NEXT: br %r14 @@ -2884,8 +2884,8 @@ entry: define <16 x i8> @eval189(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval189: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: voc %v0, %v26, %v28 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 +; CHECK-NEXT: voc %v0, %v26, %v28 ; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 ; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47 ; CHECK-NEXT: br %r14 @@ -3480,8 +3480,8 @@ define <16 x i8> @eval228(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval228: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vno %v0, %v26, %v26 -; CHECK-NEXT: vo %v1, %v28, %v24 ; CHECK-NEXT: veval %v2, %v24, %v28, %v26, 2 +; CHECK-NEXT: vo %v1, %v28, %v24 ; CHECK-NEXT: veval %v0, %v2, %v0, %v24, 47 ; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 ; CHECK-NEXT: br %r14 @@ -3564,8 +3564,8 @@ entry: define <16 x i8> @eval232(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval232: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 31 +; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174 ; CHECK-NEXT: br %r14 entry: @@ -3582,8 +3582,8 @@ entry: define <16 x i8> @eval233(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval233: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vx %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 31 +; CHECK-NEXT: vx %v1, %v28, %v26 ; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174 ; CHECK-NEXT: br %r14 entry: diff --git a/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll b/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll index e750f1e3e7b47..5bbabdd2d56fc 100644 --- a/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; Test vector intrinsics added with arch15. +; Test vector intrinsics added with z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s declare <16 x i8> @llvm.s390.vgemb(<8 x i16>) declare <8 x i16> @llvm.s390.vgemh(<16 x i8>) diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-06.ll b/llvm/test/CodeGen/SystemZ/vec-mul-06.ll index 22b1b5de62c57..3850a8f60eb16 100644 --- a/llvm/test/CodeGen/SystemZ/vec-mul-06.ll +++ b/llvm/test/CodeGen/SystemZ/vec-mul-06.ll @@ -1,6 +1,6 @@ -; Test vector multiplication on arch15. +; Test vector multiplication on z17. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s ; Test a v2i64 multiplication. define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt b/llvm/test/MC/Disassembler/SystemZ/insns-z17.txt similarity index 99% rename from llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt rename to llvm/test/MC/Disassembler/SystemZ/insns-z17.txt index 93274e6659801..c5a30b072d991 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns-z17.txt @@ -1,5 +1,5 @@ -# Test arch15 instructions that don't have PC-relative operands. -# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=arch15 \ +# Test z17 instructions that don't have PC-relative operands. +# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z17 \ # RUN: | FileCheck %s # CHECK: bdepg %r0, %r0, %r0 diff --git a/llvm/test/MC/SystemZ/insn-bad-arch15.s b/llvm/test/MC/SystemZ/insn-bad-z17.s similarity index 98% rename from llvm/test/MC/SystemZ/insn-bad-arch15.s rename to llvm/test/MC/SystemZ/insn-bad-z17.s index 915efbc942306..02e26220490f4 100644 --- a/llvm/test/MC/SystemZ/insn-bad-arch15.s +++ b/llvm/test/MC/SystemZ/insn-bad-z17.s @@ -1,5 +1,5 @@ -# For arch15 only. -# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch15 < %s 2> %t +# For z17 only. +# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z17 < %s 2> %t # RUN: FileCheck < %t %s #CHECK: error: invalid use of indexed addressing diff --git a/llvm/test/MC/SystemZ/insn-good-arch15.s b/llvm/test/MC/SystemZ/insn-good-z17.s similarity index 99% rename from llvm/test/MC/SystemZ/insn-good-arch15.s rename to llvm/test/MC/SystemZ/insn-good-z17.s index 46ff13db0b549..96f27137e4821 100644 --- a/llvm/test/MC/SystemZ/insn-good-arch15.s +++ b/llvm/test/MC/SystemZ/insn-good-z17.s @@ -1,5 +1,5 @@ -# For arch15 and above. -# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=arch15 -show-encoding %s \ +# For z17 and above. +# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=z17 -show-encoding %s \ # RUN: | FileCheck %s #CHECK: bdepg %r0, %r0, %r0 # encoding: [0xb9,0x6d,0x00,0x00] diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index c5b96e1df904e..2a3958151a604 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -340,7 +340,7 @@ TEST(getLinuxHostCPUName, s390x) { // Model Id: 9175 ExpectedCPUs.push_back("zEC12"); - ExpectedCPUs.push_back("arch15"); + ExpectedCPUs.push_back("z17"); // Model Id: 3931 ExpectedCPUs.push_back("zEC12"); From 4370072022e5265d51b64182608e133277a24ac0 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Tue, 29 Apr 2025 14:54:30 +0200 Subject: [PATCH 18/83] [clang] Forward TPL of NestedNameSpecifier This avoids type suffixes for integer constants when the type can be inferred from the template parameter, such as the unsigned parameter of A<1> and A<2> in the added test. --- clang/lib/AST/NestedNameSpecifier.cpp | 17 ++-- clang/unittests/Tooling/QualTypeNamesTest.cpp | 96 +++++++++++++++++++ 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp index 76c77569da9fd..c043996f1ada3 100644 --- a/clang/lib/AST/NestedNameSpecifier.cpp +++ b/clang/lib/AST/NestedNameSpecifier.cpp @@ -283,13 +283,16 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, case TypeSpec: { const auto *Record = dyn_cast_or_null(getAsRecordDecl()); - if (ResolveTemplateArguments && Record) { + const TemplateParameterList *TPL = nullptr; + if (Record) { + TPL = Record->getSpecializedTemplate()->getTemplateParameters(); + if (ResolveTemplateArguments) { // Print the type trait with resolved template parameters. Record->printName(OS, Policy); - printTemplateArgumentList( - OS, Record->getTemplateArgs().asArray(), Policy, - Record->getSpecializedTemplate()->getTemplateParameters()); + printTemplateArgumentList(OS, Record->getTemplateArgs().asArray(), + Policy, TPL); break; + } } const Type *T = getAsType(); @@ -313,8 +316,8 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, TemplateName::Qualified::None); // Print the template argument list. - printTemplateArgumentList(OS, SpecType->template_arguments(), - InnerPolicy); + printTemplateArgumentList(OS, SpecType->template_arguments(), InnerPolicy, + TPL); } else if (const auto *DepSpecType = dyn_cast(T)) { // Print the template name without its corresponding @@ -322,7 +325,7 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, OS << DepSpecType->getIdentifier()->getName(); // Print the template argument list. printTemplateArgumentList(OS, DepSpecType->template_arguments(), - InnerPolicy); + InnerPolicy, TPL); } else { // Print the type normally QualType(T, 0).print(OS, InnerPolicy); diff --git a/clang/unittests/Tooling/QualTypeNamesTest.cpp b/clang/unittests/Tooling/QualTypeNamesTest.cpp index 5ded64d4fcc8c..49c40d633ad4b 100644 --- a/clang/unittests/Tooling/QualTypeNamesTest.cpp +++ b/clang/unittests/Tooling/QualTypeNamesTest.cpp @@ -265,6 +265,102 @@ TEST(QualTypeNameTest, InlineNamespace) { TypeNameVisitor::Lang_CXX11); } +TEST(QualTypeNameTest, TemplatedClass) { + std::unique_ptr AST = + tooling::buildASTFromCode("template struct A {\n" + " template struct B {};\n" + "};\n" + "template struct A<1>;\n" + "template struct A<2u>;\n" + "template struct A<1>::B<3>;\n" + "template struct A<2u>::B<4u>;\n"); + + auto &Context = AST->getASTContext(); + auto &Policy = Context.getPrintingPolicy(); + auto getFullyQualifiedName = [&](QualType QT) { + return TypeName::getFullyQualifiedName(QT, Context, Policy); + }; + + auto *A = Context.getTranslationUnitDecl() + ->lookup(&Context.Idents.get("A")) + .find_first(); + ASSERT_NE(A, nullptr); + + // A has two explicit instantiations: A<1> and A<2u> + auto ASpec = A->spec_begin(); + ASSERT_NE(ASpec, A->spec_end()); + auto *A1 = *ASpec; + ASpec++; + ASSERT_NE(ASpec, A->spec_end()); + auto *A2 = *ASpec; + + // Their type names follow the records. + QualType A1RecordTy = Context.getRecordType(A1); + EXPECT_EQ(getFullyQualifiedName(A1RecordTy), "A<1>"); + QualType A2RecordTy = Context.getRecordType(A2); + EXPECT_EQ(getFullyQualifiedName(A2RecordTy), "A<2U>"); + + // getTemplateSpecializationType() gives types that print the integral + // argument directly. + TemplateArgument Args1[] = { + {Context, llvm::APSInt::getUnsigned(1u), Context.UnsignedIntTy}}; + QualType A1TemplateSpecTy = + Context.getTemplateSpecializationType(TemplateName(A), Args1, A1RecordTy); + EXPECT_EQ(A1TemplateSpecTy.getAsString(), "A<1>"); + + TemplateArgument Args2[] = { + {Context, llvm::APSInt::getUnsigned(2u), Context.UnsignedIntTy}}; + QualType A2TemplateSpecTy = + Context.getTemplateSpecializationType(TemplateName(A), Args2, A2RecordTy); + EXPECT_EQ(A2TemplateSpecTy.getAsString(), "A<2>"); + + // Find A<1>::B and its specialization B<3>. + auto *A1B = + A1->lookup(&Context.Idents.get("B")).find_first(); + ASSERT_NE(A1B, nullptr); + auto A1BSpec = A1B->spec_begin(); + ASSERT_NE(A1BSpec, A1B->spec_end()); + auto *A1B3 = *A1BSpec; + QualType A1B3RecordTy = Context.getRecordType(A1B3); + EXPECT_EQ(getFullyQualifiedName(A1B3RecordTy), "A<1>::B<3>"); + + // Construct A<1>::B<3> and check name. + TemplateArgument Args3[] = { + {Context, llvm::APSInt::getUnsigned(3u), Context.UnsignedIntTy}}; + QualType A1B3TemplateSpecTy = Context.getTemplateSpecializationType( + TemplateName(A1B), Args3, A1B3RecordTy); + EXPECT_EQ(A1B3TemplateSpecTy.getAsString(), "B<3>"); + + NestedNameSpecifier *A1Nested = NestedNameSpecifier::Create( + Context, nullptr, false, A1TemplateSpecTy.getTypePtr()); + QualType A1B3ElaboratedTy = Context.getElaboratedType( + ElaboratedTypeKeyword::None, A1Nested, A1B3TemplateSpecTy); + EXPECT_EQ(A1B3ElaboratedTy.getAsString(), "A<1>::B<3>"); + + // Find A<2u>::B and its specialization B<4u>. + auto *A2B = + A2->lookup(&Context.Idents.get("B")).find_first(); + ASSERT_NE(A2B, nullptr); + auto A2BSpec = A2B->spec_begin(); + ASSERT_NE(A2BSpec, A2B->spec_end()); + auto *A2B4 = *A2BSpec; + QualType A2B4RecordTy = Context.getRecordType(A2B4); + EXPECT_EQ(getFullyQualifiedName(A2B4RecordTy), "A<2U>::B<4U>"); + + // Construct A<2>::B<4> and check name. + TemplateArgument Args4[] = { + {Context, llvm::APSInt::getUnsigned(4u), Context.UnsignedIntTy}}; + QualType A2B4TemplateSpecTy = Context.getTemplateSpecializationType( + TemplateName(A2B), Args4, A2B4RecordTy); + EXPECT_EQ(A2B4TemplateSpecTy.getAsString(), "B<4>"); + + NestedNameSpecifier *A2Nested = NestedNameSpecifier::Create( + Context, nullptr, false, A2TemplateSpecTy.getTypePtr()); + QualType A2B4ElaboratedTy = Context.getElaboratedType( + ElaboratedTypeKeyword::None, A2Nested, A2B4TemplateSpecTy); + EXPECT_EQ(A2B4ElaboratedTy.getAsString(), "A<2>::B<4>"); +} + TEST(QualTypeNameTest, AnonStrucs) { TypeNameVisitor AnonStrucs; AnonStrucs.ExpectedQualTypeNames["a"] = "short"; From f811c7df0a105549aeae2aa42ca31f6d55e652f2 Mon Sep 17 00:00:00 2001 From: davidtrevelyan Date: Thu, 13 Mar 2025 10:18:25 +0000 Subject: [PATCH 19/83] [rtsan][Apple] Add interceptor for _os_nospin_lock_lock (#131034) Follows the discussion here: https://github.com/llvm/llvm-project/pull/129309 Recently, the test `TestRtsan.AccessingALargeAtomicVariableDiesWhenRealtime` has been failing on newer MacOS versions, because the internal locking mechanism in `std::atomic::load` (for types `T` that are larger than the hardware lock-free limit), has changed to a function that wasn't being intercepted by rtsan. This PR introduces an interceptor for `_os_nospin_lock_lock`, which is the new internal locking mechanism. _Note: we'd probably do well to introduce interceptors for `_os_nospin_lock_unlock` (and `os_unfair_lock_unlock`) too, which also appear to have blocking implementations. This can follow in a separate PR._ (cherry picked from commit 481a55a3d9645a6bc1540d326319b78ad8ed8db1) --- .../lib/rtsan/rtsan_interceptors_posix.cpp | 11 +++++++++++ .../tests/rtsan_test_interceptors_posix.cpp | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 6816119065263..4d602a88ba9ae 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -30,6 +30,12 @@ extern "C" { typedef int32_t OSSpinLock; void OSSpinLockLock(volatile OSSpinLock *__lock); +// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but +// it's an internal implementation detail of `os/lock.c` on Darwin, and +// therefore not available in any headers. As a workaround, we forward declare +// it here, which is enough to facilitate interception of _os_nospin_lock_lock. +struct _os_nospin_lock_s; +using _os_nospin_lock_t = _os_nospin_lock_s *; } #endif // TARGET_OS_MAC @@ -642,6 +648,11 @@ INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { __rtsan_notify_intercepted_call("os_unfair_lock_lock"); return REAL(os_unfair_lock_lock)(lock); } + +INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) { + __rtsan_notify_intercepted_call("_os_nospin_lock_lock"); + return REAL(_os_nospin_lock_lock)(lock); +} #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK \ INTERCEPT_FUNCTION(os_unfair_lock_lock) #else diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 59663776366bb..75f723081c4b6 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1058,6 +1058,25 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "os_unfair_lock_lock"); ExpectNonRealtimeSurvival(Func); } + +// We intercept _os_nospin_lock_lock because it's the internal +// locking mechanism for MacOS's atomic implementation for data +// types that are larger than the hardware's maximum lock-free size. +// However, it's a private implementation detail and not visible in any headers, +// so we must duplicate the required type definitions to forward declaration +// what we need here. +extern "C" { +struct _os_nospin_lock_s { + unsigned int oul_value; +}; +void _os_nospin_lock_lock(_os_nospin_lock_s *); +} +TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) { + _os_nospin_lock_s lock{}; + auto Func = [&]() { _os_nospin_lock_lock(&lock); }; + ExpectRealtimeDeath(Func, "_os_nospin_lock_lock"); + ExpectNonRealtimeSurvival(Func); +} #endif #if SANITIZER_LINUX From b7b834e2a20ed295eaab596dd348db5463b951d8 Mon Sep 17 00:00:00 2001 From: thetruestblue Date: Fri, 18 Apr 2025 11:25:31 -0700 Subject: [PATCH 20/83] [RTSan][Darwin] Adjust OSSpinLock/_os_nospin_lock interceptor and tests (#132867) These changes align with these lock types and allows builds and tests to pass with various SDKS. rdar://147067322 (cherry picked from commit 7cc4472037b43971bd3ee373fe75b5043f5abca9) --- .../lib/rtsan/rtsan_interceptors_posix.cpp | 37 +++++++---------- .../tests/rtsan_test_interceptors_posix.cpp | 40 +++++++++---------- 2 files changed, 32 insertions(+), 45 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index 4d602a88ba9ae..040f501ee52e9 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -21,24 +21,6 @@ #include "rtsan/rtsan.h" #if SANITIZER_APPLE - -#if TARGET_OS_MAC -// On MacOS OSSpinLockLock is deprecated and no longer present in the headers, -// but the symbol still exists on the system. Forward declare here so we -// don't get compilation errors. -#include -extern "C" { -typedef int32_t OSSpinLock; -void OSSpinLockLock(volatile OSSpinLock *__lock); -// A pointer to this type is in the interface for `_os_nospin_lock_lock`, but -// it's an internal implementation detail of `os/lock.c` on Darwin, and -// therefore not available in any headers. As a workaround, we forward declare -// it here, which is enough to facilitate interception of _os_nospin_lock_lock. -struct _os_nospin_lock_s; -using _os_nospin_lock_t = _os_nospin_lock_s *; -} -#endif // TARGET_OS_MAC - #include #include #endif // SANITIZER_APPLE @@ -633,26 +615,35 @@ INTERCEPTOR(mode_t, umask, mode_t cmask) { #pragma clang diagnostic push // OSSpinLockLock is deprecated, but still in use in libc++ #pragma clang diagnostic ignored "-Wdeprecated-declarations" +#undef OSSpinLockLock + INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) { __rtsan_notify_intercepted_call("OSSpinLockLock"); return REAL(OSSpinLockLock)(lock); } -#pragma clang diagnostic pop + #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK INTERCEPT_FUNCTION(OSSpinLockLock) #else #define RTSAN_MAYBE_INTERCEPT_OSSPINLOCKLOCK #endif // SANITIZER_APPLE #if SANITIZER_APPLE -INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { - __rtsan_notify_intercepted_call("os_unfair_lock_lock"); - return REAL(os_unfair_lock_lock)(lock); -} +// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro. +typedef volatile OSSpinLock *_os_nospin_lock_t; INTERCEPTOR(void, _os_nospin_lock_lock, _os_nospin_lock_t lock) { __rtsan_notify_intercepted_call("_os_nospin_lock_lock"); return REAL(_os_nospin_lock_lock)(lock); } +#pragma clang diagnostic pop // "-Wdeprecated-declarations" +#endif // SANITIZER_APPLE + +#if SANITIZER_APPLE +INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { + __rtsan_notify_intercepted_call("os_unfair_lock_lock"); + return REAL(os_unfair_lock_lock)(lock); +} + #define RTSAN_MAYBE_INTERCEPT_OS_UNFAIR_LOCK_LOCK \ INTERCEPT_FUNCTION(os_unfair_lock_lock) #else diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 75f723081c4b6..7eda884951c83 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1036,10 +1036,18 @@ TEST(TestRtsanInterceptors, PthreadJoinDiesWhenRealtime) { } #if SANITIZER_APPLE - #pragma clang diagnostic push // OSSpinLockLock is deprecated, but still in use in libc++ #pragma clang diagnostic ignored "-Wdeprecated-declarations" +#undef OSSpinLockLock +extern "C" { +typedef int32_t OSSpinLock; +void OSSpinLockLock(volatile OSSpinLock *__lock); +// _os_nospin_lock_lock may replace OSSpinLockLock due to deprecation macro. +typedef volatile OSSpinLock *_os_nospin_lock_t; +void _os_nospin_lock_lock(_os_nospin_lock_t lock); +} + TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) { auto Func = []() { OSSpinLock spin_lock{}; @@ -1048,7 +1056,14 @@ TEST(TestRtsanInterceptors, OsSpinLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "OSSpinLockLock"); ExpectNonRealtimeSurvival(Func); } -#pragma clang diagnostic pop + +TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) { + OSSpinLock lock{}; + auto Func = [&]() { _os_nospin_lock_lock(&lock); }; + ExpectRealtimeDeath(Func, "_os_nospin_lock_lock"); + ExpectNonRealtimeSurvival(Func); +} +#pragma clang diagnostic pop //"-Wdeprecated-declarations" TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { auto Func = []() { @@ -1058,26 +1073,7 @@ TEST(TestRtsanInterceptors, OsUnfairLockLockDiesWhenRealtime) { ExpectRealtimeDeath(Func, "os_unfair_lock_lock"); ExpectNonRealtimeSurvival(Func); } - -// We intercept _os_nospin_lock_lock because it's the internal -// locking mechanism for MacOS's atomic implementation for data -// types that are larger than the hardware's maximum lock-free size. -// However, it's a private implementation detail and not visible in any headers, -// so we must duplicate the required type definitions to forward declaration -// what we need here. -extern "C" { -struct _os_nospin_lock_s { - unsigned int oul_value; -}; -void _os_nospin_lock_lock(_os_nospin_lock_s *); -} -TEST(TestRtsanInterceptors, OsNoSpinLockLockDiesWhenRealtime) { - _os_nospin_lock_s lock{}; - auto Func = [&]() { _os_nospin_lock_lock(&lock); }; - ExpectRealtimeDeath(Func, "_os_nospin_lock_lock"); - ExpectNonRealtimeSurvival(Func); -} -#endif +#endif // SANITIZER_APPLE #if SANITIZER_LINUX TEST(TestRtsanInterceptors, SpinLockLockDiesWhenRealtime) { From 0019b7d0ae0bcc65af065542fcfb48ea0eb55d38 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Fri, 25 Apr 2025 20:05:00 +0530 Subject: [PATCH 21/83] [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Towards This change moves WasmSym from a static global struct to an instance owned by Ctx, allowing it to be reset cleanly between linker runs. This enables safe support for multiple invocations of wasm-ld within the same process Changes done - Converted WasmSym from a static struct to a regular struct with instance members. - Added a std::unique_ptr wasmSym field inside Ctx. - Reset wasmSym in Ctx::reset() to clear state between links. - Replaced all WasmSym:: references with ctx.wasmSym->. - Removed global symbol definitions from Symbols.cpp that are no longer needed. Clearing wasmSym in ctx.reset() ensures a clean slate for each link invocation, preventing symbol leakage across runs—critical when using wasm-ld/lld as a reentrant library where global state can cause subtle, hard-to-debug errors. --------- Co-authored-by: Vassil Vassilev (cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4) --- lld/wasm/Config.h | 106 +++++++++++++++++++ lld/wasm/Driver.cpp | 60 +++++------ lld/wasm/InputChunks.cpp | 10 +- lld/wasm/MarkLive.cpp | 6 +- lld/wasm/OutputSections.cpp | 4 +- lld/wasm/Symbols.cpp | 25 ----- lld/wasm/Symbols.h | 99 ------------------ lld/wasm/SyntheticSections.cpp | 32 +++--- lld/wasm/Writer.cpp | 185 +++++++++++++++++---------------- 9 files changed, 253 insertions(+), 274 deletions(-) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..527edc11c48e3 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,107 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { + // __global_base + // Symbol marking the start of the global section. + DefinedData *globalBase; + + // __stack_pointer/__stack_low/__stack_high + // Global that holds current value of stack pointer and data symbols marking + // the start and end of the stack region. stackPointer is initialized to + // stackHigh and grows downwards towards stackLow + GlobalSymbol *stackPointer; + DefinedData *stackLow; + DefinedData *stackHigh; + + // __tls_base + // Global that holds the address of the base of the current thread's + // TLS block. + GlobalSymbol *tlsBase; + + // __tls_size + // Symbol whose value is the size of the TLS block. + GlobalSymbol *tlsSize; + + // __tls_size + // Symbol whose value is the alignment of the TLS block. + GlobalSymbol *tlsAlign; + + // __data_end + // Symbol marking the end of the data and bss. + DefinedData *dataEnd; + + // __heap_base/__heap_end + // Symbols marking the beginning and end of the "heap". It starts at the end + // of the data, bss and explicit stack, and extends to the end of the linear + // memory allocated by wasm-ld. This region of memory is not used by the + // linked code, so it may be used as a backing store for `sbrk` or `malloc` + // implementations. + DefinedData *heapBase; + DefinedData *heapEnd; + + // __wasm_init_memory_flag + // Symbol whose contents are nonzero iff memory has already been + // initialized. + DefinedData *initMemoryFlag; + + // __wasm_init_memory + // Function that initializes passive data segments during instantiation. + DefinedFunction *initMemory; + + // __wasm_call_ctors + // Function that directly calls all ctors in priority order. + DefinedFunction *callCtors; + + // __wasm_call_dtors + // Function that calls the libc/etc. cleanup function. + DefinedFunction *callDtors; + + // __wasm_apply_global_relocs + // Function that applies relocations to wasm globals post-instantiation. + // Unlike __wasm_apply_data_relocs this needs to run on every thread. + DefinedFunction *applyGlobalRelocs; + + // __wasm_apply_tls_relocs + // Like __wasm_apply_data_relocs but for TLS section. These must be + // delayed until __wasm_init_tls. + DefinedFunction *applyTLSRelocs; + + // __wasm_apply_global_tls_relocs + // Like applyGlobalRelocs but for globals that hold TLS addresses. These + // must be delayed until __wasm_init_tls. + DefinedFunction *applyGlobalTLSRelocs; + + // __wasm_init_tls + // Function that allocates thread-local storage and initializes it. + DefinedFunction *initTLS; + + // Pointer to the function that is to be used in the start section. + // (normally an alias of initMemory, or applyGlobalRelocs). + DefinedFunction *startFunction; + + // __dso_handle + // Symbol used in calls to __cxa_atexit to determine current DLL + DefinedData *dsoHandle; + + // __table_base + // Used in PIC code for offset of indirect function table + UndefinedGlobal *tableBase; + DefinedData *definedTableBase; + + // __memory_base + // Used in PIC code for offset of global data + UndefinedGlobal *memoryBase; + DefinedData *definedMemoryBase; + + // __indirect_function_table + // Used as an address space for function pointers, with each function that + // is used as a function pointer being allocated a slot. + TableSymbol *indirectFunctionTable; + }; + WasmSym sym; + // True if we are creating position-independent code. bool isPic = false; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index c3a74dde6480e..467c49e9981bc 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -70,6 +70,7 @@ void Ctx::reset() { isPic = false; legacyFunctionTable = false; emitBssSegments = false; + sym = WasmSym{}; } namespace { @@ -941,14 +942,14 @@ static void createSyntheticSymbols() { true}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64, true}; - WasmSym::callCtors = symtab->addSyntheticFunction( + ctx.sym.callCtors = symtab->addSyntheticFunction( "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_call_ctors")); bool is64 = ctx.arg.is64.value_or(false); if (ctx.isPic) { - WasmSym::stackPointer = + ctx.sym.stackPointer = createUndefinedGlobal("__stack_pointer", ctx.arg.is64.value_or(false) ? &mutableGlobalTypeI64 : &mutableGlobalTypeI32); @@ -958,25 +959,24 @@ static void createSyntheticSymbols() { // See: // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md auto *globalType = is64 ? &globalTypeI64 : &globalTypeI32; - WasmSym::memoryBase = createUndefinedGlobal("__memory_base", globalType); - WasmSym::tableBase = createUndefinedGlobal("__table_base", globalType); - WasmSym::memoryBase->markLive(); - WasmSym::tableBase->markLive(); + ctx.sym.memoryBase = createUndefinedGlobal("__memory_base", globalType); + ctx.sym.tableBase = createUndefinedGlobal("__table_base", globalType); + ctx.sym.memoryBase->markLive(); + ctx.sym.tableBase->markLive(); } else { // For non-PIC code - WasmSym::stackPointer = createGlobalVariable("__stack_pointer", true); - WasmSym::stackPointer->markLive(); + ctx.sym.stackPointer = createGlobalVariable("__stack_pointer", true); + ctx.sym.stackPointer->markLive(); } if (ctx.arg.sharedMemory) { - WasmSym::tlsBase = createGlobalVariable("__tls_base", true); - WasmSym::tlsSize = createGlobalVariable("__tls_size", false); - WasmSym::tlsAlign = createGlobalVariable("__tls_align", false); - WasmSym::initTLS = symtab->addSyntheticFunction( + ctx.sym.tlsBase = createGlobalVariable("__tls_base", true); + ctx.sym.tlsSize = createGlobalVariable("__tls_size", false); + ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false); + ctx.sym.initTLS = symtab->addSyntheticFunction( "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, - make( - is64 ? i64ArgSignature : i32ArgSignature, - "__wasm_init_tls")); + make(is64 ? i64ArgSignature : i32ArgSignature, + "__wasm_init_tls")); } } @@ -984,19 +984,19 @@ static void createOptionalSymbols() { if (ctx.arg.relocatable) return; - WasmSym::dsoHandle = symtab->addOptionalDataSymbol("__dso_handle"); + ctx.sym.dsoHandle = symtab->addOptionalDataSymbol("__dso_handle"); if (!ctx.arg.shared) - WasmSym::dataEnd = symtab->addOptionalDataSymbol("__data_end"); + ctx.sym.dataEnd = symtab->addOptionalDataSymbol("__data_end"); if (!ctx.isPic) { - WasmSym::stackLow = symtab->addOptionalDataSymbol("__stack_low"); - WasmSym::stackHigh = symtab->addOptionalDataSymbol("__stack_high"); - WasmSym::globalBase = symtab->addOptionalDataSymbol("__global_base"); - WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base"); - WasmSym::heapEnd = symtab->addOptionalDataSymbol("__heap_end"); - WasmSym::definedMemoryBase = symtab->addOptionalDataSymbol("__memory_base"); - WasmSym::definedTableBase = symtab->addOptionalDataSymbol("__table_base"); + ctx.sym.stackLow = symtab->addOptionalDataSymbol("__stack_low"); + ctx.sym.stackHigh = symtab->addOptionalDataSymbol("__stack_high"); + ctx.sym.globalBase = symtab->addOptionalDataSymbol("__global_base"); + ctx.sym.heapBase = symtab->addOptionalDataSymbol("__heap_base"); + ctx.sym.heapEnd = symtab->addOptionalDataSymbol("__heap_end"); + ctx.sym.definedMemoryBase = symtab->addOptionalDataSymbol("__memory_base"); + ctx.sym.definedTableBase = symtab->addOptionalDataSymbol("__table_base"); } // For non-shared memory programs we still need to define __tls_base since we @@ -1009,7 +1009,7 @@ static void createOptionalSymbols() { // __tls_size and __tls_align are not needed in this case since they are only // needed for __wasm_init_tls (which we do not create in this case). if (!ctx.arg.sharedMemory) - WasmSym::tlsBase = createOptionalGlobal("__tls_base", false); + ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false); } static void processStubLibrariesPreLTO() { @@ -1384,9 +1384,9 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // by libc/etc., because destructors are registered dynamically with // `__cxa_atexit` and friends. if (!ctx.arg.relocatable && !ctx.arg.shared && - !WasmSym::callCtors->isUsedInRegularObj && - WasmSym::callCtors->getName() != ctx.arg.entry && - !ctx.arg.exportedSymbols.count(WasmSym::callCtors->getName())) { + !ctx.sym.callCtors->isUsedInRegularObj && + ctx.sym.callCtors->getName() != ctx.arg.entry && + !ctx.arg.exportedSymbols.count(ctx.sym.callCtors->getName())) { if (Symbol *callDtors = handleUndefined("__wasm_call_dtors", "")) { if (auto *callDtorsFunc = dyn_cast(callDtors)) { @@ -1395,7 +1395,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { !callDtorsFunc->signature->Returns.empty())) { error("__wasm_call_dtors must have no argument or return values"); } - WasmSym::callDtors = callDtorsFunc; + ctx.sym.callDtors = callDtorsFunc; } else { error("__wasm_call_dtors must be a function"); } @@ -1488,7 +1488,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { markLive(); // Provide the indirect function table if needed. - WasmSym::indirectFunctionTable = + ctx.sym.indirectFunctionTable = symtab->resolveIndirectFunctionTable(/*required =*/false); if (errorCount()) diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp index ccdc92f5c8d71..0e6c4e691be10 100644 --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -397,9 +397,9 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const { if (ctx.isPic) { writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); if (isTLS()) - writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "tls_base"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "tls_base"); else - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); + writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "memory_base"); writeU8(os, opcode_ptr_add, "ADD"); } @@ -422,12 +422,12 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const { } } else { assert(ctx.isPic); - const GlobalSymbol* baseSymbol = WasmSym::memoryBase; + const GlobalSymbol *baseSymbol = ctx.sym.memoryBase; if (rel.Type == R_WASM_TABLE_INDEX_I32 || rel.Type == R_WASM_TABLE_INDEX_I64) - baseSymbol = WasmSym::tableBase; + baseSymbol = ctx.sym.tableBase; else if (sym->isTLS()) - baseSymbol = WasmSym::tlsBase; + baseSymbol = ctx.sym.tlsBase; writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, baseSymbol->getGlobalIndex(), "base"); writeU8(os, opcode_reloc_const, "CONST"); diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 13c7a3d894fe3..2b2cf19f14b30 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -114,8 +114,8 @@ void MarkLive::run() { if (sym->isNoStrip() || sym->isExported()) enqueue(sym); - if (WasmSym::callDtors) - enqueue(WasmSym::callDtors); + if (ctx.sym.callDtors) + enqueue(ctx.sym.callDtors); for (const ObjFile *obj : ctx.objectFiles) if (obj->isLive()) { @@ -131,7 +131,7 @@ void MarkLive::run() { // If we have any non-discarded init functions, mark `__wasm_call_ctors` as // live so that we assign it an index and call it. if (isCallCtorsLive()) - WasmSym::callCtors->markLive(); + ctx.sym.callCtors->markLive(); } void MarkLive::mark() { diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp index 95f7ecc29de6b..4142a913c8cbf 100644 --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -123,7 +123,7 @@ void DataSection::finalizeContents() { if ((segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { if (ctx.isPic && ctx.arg.extendedConst) { writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get"); - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), + writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "literal (global index)"); if (segment->startVA) { writePtrConst(os, segment->startVA, is64, "offset"); @@ -136,7 +136,7 @@ void DataSection::finalizeContents() { if (ctx.isPic) { assert(segment->startVA == 0); initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET; - initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex(); + initExpr.Inst.Value.Global = ctx.sym.memoryBase->getGlobalIndex(); } else { initExpr = intConst(segment->startVA, is64); } diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index a687fd6d6c4ef..92a933ecbb024 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -77,31 +77,6 @@ std::string toString(wasm::Symbol::Kind kind) { } namespace wasm { -DefinedFunction *WasmSym::callCtors; -DefinedFunction *WasmSym::callDtors; -DefinedFunction *WasmSym::initMemory; -DefinedFunction *WasmSym::applyGlobalRelocs; -DefinedFunction *WasmSym::applyTLSRelocs; -DefinedFunction *WasmSym::applyGlobalTLSRelocs; -DefinedFunction *WasmSym::initTLS; -DefinedFunction *WasmSym::startFunction; -DefinedData *WasmSym::dsoHandle; -DefinedData *WasmSym::dataEnd; -DefinedData *WasmSym::globalBase; -DefinedData *WasmSym::heapBase; -DefinedData *WasmSym::heapEnd; -DefinedData *WasmSym::initMemoryFlag; -GlobalSymbol *WasmSym::stackPointer; -DefinedData *WasmSym::stackLow; -DefinedData *WasmSym::stackHigh; -GlobalSymbol *WasmSym::tlsBase; -GlobalSymbol *WasmSym::tlsSize; -GlobalSymbol *WasmSym::tlsAlign; -UndefinedGlobal *WasmSym::tableBase; -DefinedData *WasmSym::definedTableBase; -UndefinedGlobal *WasmSym::memoryBase; -DefinedData *WasmSym::definedMemoryBase; -TableSymbol *WasmSym::indirectFunctionTable; WasmSymbolType Symbol::getWasmType() const { if (isa(this)) diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index b409fffc50a6c..55ee21939ce07 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -537,105 +537,6 @@ class LazySymbol : public Symbol { const WasmSignature *signature = nullptr; }; -// linker-generated symbols -struct WasmSym { - // __global_base - // Symbol marking the start of the global section. - static DefinedData *globalBase; - - // __stack_pointer/__stack_low/__stack_high - // Global that holds current value of stack pointer and data symbols marking - // the start and end of the stack region. stackPointer is initialized to - // stackHigh and grows downwards towards stackLow - static GlobalSymbol *stackPointer; - static DefinedData *stackLow; - static DefinedData *stackHigh; - - // __tls_base - // Global that holds the address of the base of the current thread's - // TLS block. - static GlobalSymbol *tlsBase; - - // __tls_size - // Symbol whose value is the size of the TLS block. - static GlobalSymbol *tlsSize; - - // __tls_size - // Symbol whose value is the alignment of the TLS block. - static GlobalSymbol *tlsAlign; - - // __data_end - // Symbol marking the end of the data and bss. - static DefinedData *dataEnd; - - // __heap_base/__heap_end - // Symbols marking the beginning and end of the "heap". It starts at the end - // of the data, bss and explicit stack, and extends to the end of the linear - // memory allocated by wasm-ld. This region of memory is not used by the - // linked code, so it may be used as a backing store for `sbrk` or `malloc` - // implementations. - static DefinedData *heapBase; - static DefinedData *heapEnd; - - // __wasm_init_memory_flag - // Symbol whose contents are nonzero iff memory has already been initialized. - static DefinedData *initMemoryFlag; - - // __wasm_init_memory - // Function that initializes passive data segments during instantiation. - static DefinedFunction *initMemory; - - // __wasm_call_ctors - // Function that directly calls all ctors in priority order. - static DefinedFunction *callCtors; - - // __wasm_call_dtors - // Function that calls the libc/etc. cleanup function. - static DefinedFunction *callDtors; - - // __wasm_apply_global_relocs - // Function that applies relocations to wasm globals post-instantiation. - // Unlike __wasm_apply_data_relocs this needs to run on every thread. - static DefinedFunction *applyGlobalRelocs; - - // __wasm_apply_tls_relocs - // Like __wasm_apply_data_relocs but for TLS section. These must be - // delayed until __wasm_init_tls. - static DefinedFunction *applyTLSRelocs; - - // __wasm_apply_global_tls_relocs - // Like applyGlobalRelocs but for globals that hold TLS addresses. These - // must be delayed until __wasm_init_tls. - static DefinedFunction *applyGlobalTLSRelocs; - - // __wasm_init_tls - // Function that allocates thread-local storage and initializes it. - static DefinedFunction *initTLS; - - // Pointer to the function that is to be used in the start section. - // (normally an alias of initMemory, or applyGlobalRelocs). - static DefinedFunction *startFunction; - - // __dso_handle - // Symbol used in calls to __cxa_atexit to determine current DLL - static DefinedData *dsoHandle; - - // __table_base - // Used in PIC code for offset of indirect function table - static UndefinedGlobal *tableBase; - static DefinedData *definedTableBase; - - // __memory_base - // Used in PIC code for offset of global data - static UndefinedGlobal *memoryBase; - static DefinedData *definedMemoryBase; - - // __indirect_function_table - // Used as an address space for function pointers, with each function that is - // used as a function pointer being allocated a slot. - static TableSymbol *indirectFunctionTable; -}; - // A buffer class that is large enough to hold any Symbol-derived // object. We allocate memory using this class and instantiate a symbol // using the placement new. diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 7fb44b9f0c009..0e2aa57e9048e 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -319,8 +319,8 @@ void TableSection::addTable(InputTable *table) { // Some inputs require that the indirect function table be assigned to table // number 0. if (ctx.legacyFunctionTable && - isa(WasmSym::indirectFunctionTable) && - cast(WasmSym::indirectFunctionTable)->table == table) { + isa(ctx.sym.indirectFunctionTable) && + cast(ctx.sym.indirectFunctionTable)->table == table) { if (out.importSec->getNumImportedTables()) { // Alack! Some other input imported a table, meaning that we are unable // to assign table number 0 to the indirect function table. @@ -395,8 +395,8 @@ void GlobalSection::assignIndexes() { } static void ensureIndirectFunctionTable() { - if (!WasmSym::indirectFunctionTable) - WasmSym::indirectFunctionTable = + if (!ctx.sym.indirectFunctionTable) + ctx.sym.indirectFunctionTable = symtab->resolveIndirectFunctionTable(/*required =*/true); } @@ -430,10 +430,9 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const { // Get __memory_base writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); if (sym->isTLS()) - writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "__tls_base"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); else - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), - "__memory_base"); + writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "__memory_base"); // Add the virtual address of the data symbol writeU8(os, opcode_ptr_const, "CONST"); @@ -443,7 +442,7 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const { continue; // Get __table_base writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); - writeUleb128(os, WasmSym::tableBase->getGlobalIndex(), "__table_base"); + writeUleb128(os, ctx.sym.tableBase->getGlobalIndex(), "__table_base"); // Add the table index to __table_base writeU8(os, opcode_ptr_const, "CONST"); @@ -490,13 +489,13 @@ void GlobalSection::writeBody() { if (ctx.arg.extendedConst && ctx.isPic) { if (auto *d = dyn_cast(sym)) { if (!sym->isTLS()) { - globalIdx = WasmSym::memoryBase->getGlobalIndex(); + globalIdx = ctx.sym.memoryBase->getGlobalIndex(); offset = d->getVA(); useExtendedConst = true; } } else if (auto *f = dyn_cast(sym)) { if (!sym->isStub) { - globalIdx = WasmSym::tableBase->getGlobalIndex(); + globalIdx = ctx.sym.tableBase->getGlobalIndex(); offset = f->getTableIndex(); useExtendedConst = true; } @@ -550,14 +549,11 @@ void ExportSection::writeBody() { writeExport(os, export_); } -bool StartSection::isNeeded() const { - return WasmSym::startFunction != nullptr; -} +bool StartSection::isNeeded() const { return ctx.sym.startFunction != nullptr; } void StartSection::writeBody() { raw_ostream &os = bodyOutputStream; - writeUleb128(os, WasmSym::startFunction->getFunctionIndex(), - "function index"); + writeUleb128(os, ctx.sym.startFunction->getFunctionIndex(), "function index"); } void ElemSection::addEntry(FunctionSymbol *sym) { @@ -573,9 +569,9 @@ void ElemSection::addEntry(FunctionSymbol *sym) { void ElemSection::writeBody() { raw_ostream &os = bodyOutputStream; - assert(WasmSym::indirectFunctionTable); + assert(ctx.sym.indirectFunctionTable); writeUleb128(os, 1, "segment count"); - uint32_t tableNumber = WasmSym::indirectFunctionTable->getTableNumber(); + uint32_t tableNumber = ctx.sym.indirectFunctionTable->getTableNumber(); uint32_t flags = 0; if (tableNumber) flags |= WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER; @@ -587,7 +583,7 @@ void ElemSection::writeBody() { initExpr.Extended = false; if (ctx.isPic) { initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET; - initExpr.Inst.Value.Global = WasmSym::tableBase->getGlobalIndex(); + initExpr.Inst.Value.Global = ctx.sym.tableBase->getGlobalIndex(); } else { bool is64 = ctx.arg.is64.value_or(false); initExpr = intConst(ctx.arg.tableBase, is64); diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 76e38f548157c..2bf4b370a7dbd 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -340,16 +340,16 @@ void Writer::layoutMemory() { if (ctx.arg.relocatable || ctx.isPic) return; memoryPtr = alignTo(memoryPtr, stackAlignment); - if (WasmSym::stackLow) - WasmSym::stackLow->setVA(memoryPtr); + if (ctx.sym.stackLow) + ctx.sym.stackLow->setVA(memoryPtr); if (ctx.arg.zStackSize != alignTo(ctx.arg.zStackSize, stackAlignment)) error("stack size must be " + Twine(stackAlignment) + "-byte aligned"); log("mem: stack size = " + Twine(ctx.arg.zStackSize)); log("mem: stack base = " + Twine(memoryPtr)); memoryPtr += ctx.arg.zStackSize; - setGlobalPtr(cast(WasmSym::stackPointer), memoryPtr); - if (WasmSym::stackHigh) - WasmSym::stackHigh->setVA(memoryPtr); + setGlobalPtr(cast(ctx.sym.stackPointer), memoryPtr); + if (ctx.sym.stackHigh) + ctx.sym.stackHigh->setVA(memoryPtr); log("mem: stack top = " + Twine(memoryPtr)); }; @@ -367,15 +367,15 @@ void Writer::layoutMemory() { } log("mem: global base = " + Twine(memoryPtr)); - if (WasmSym::globalBase) - WasmSym::globalBase->setVA(memoryPtr); + if (ctx.sym.globalBase) + ctx.sym.globalBase->setVA(memoryPtr); uint64_t dataStart = memoryPtr; // Arbitrarily set __dso_handle handle to point to the start of the data // segments. - if (WasmSym::dsoHandle) - WasmSym::dsoHandle->setVA(dataStart); + if (ctx.sym.dsoHandle) + ctx.sym.dsoHandle->setVA(dataStart); out.dylinkSec->memAlign = 0; for (OutputSegment *seg : segments) { @@ -386,16 +386,16 @@ void Writer::layoutMemory() { memoryPtr, seg->size, seg->alignment)); if (!ctx.arg.relocatable && seg->isTLS()) { - if (WasmSym::tlsSize) { - auto *tlsSize = cast(WasmSym::tlsSize); + if (ctx.sym.tlsSize) { + auto *tlsSize = cast(ctx.sym.tlsSize); setGlobalPtr(tlsSize, seg->size); } - if (WasmSym::tlsAlign) { - auto *tlsAlign = cast(WasmSym::tlsAlign); + if (ctx.sym.tlsAlign) { + auto *tlsAlign = cast(ctx.sym.tlsAlign); setGlobalPtr(tlsAlign, int64_t{1} << seg->alignment); } - if (!ctx.arg.sharedMemory && WasmSym::tlsBase) { - auto *tlsBase = cast(WasmSym::tlsBase); + if (!ctx.arg.sharedMemory && ctx.sym.tlsBase) { + auto *tlsBase = cast(ctx.sym.tlsBase); setGlobalPtr(tlsBase, memoryPtr); } } @@ -406,17 +406,17 @@ void Writer::layoutMemory() { // Make space for the memory initialization flag if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) { memoryPtr = alignTo(memoryPtr, 4); - WasmSym::initMemoryFlag = symtab->addSyntheticDataSymbol( + ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol( "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN); - WasmSym::initMemoryFlag->markLive(); - WasmSym::initMemoryFlag->setVA(memoryPtr); + ctx.sym.initMemoryFlag->markLive(); + ctx.sym.initMemoryFlag->setVA(memoryPtr); log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", "__wasm_init_memory_flag", memoryPtr, 4, 4)); memoryPtr += 4; } - if (WasmSym::dataEnd) - WasmSym::dataEnd->setVA(memoryPtr); + if (ctx.sym.dataEnd) + ctx.sym.dataEnd->setVA(memoryPtr); uint64_t staticDataSize = memoryPtr - dataStart; log("mem: static data = " + Twine(staticDataSize)); @@ -426,7 +426,7 @@ void Writer::layoutMemory() { if (!ctx.arg.stackFirst) placeStack(); - if (WasmSym::heapBase) { + if (ctx.sym.heapBase) { // Set `__heap_base` to follow the end of the stack or global data. The // fact that this comes last means that a malloc/brk implementation can // grow the heap at runtime. @@ -434,7 +434,7 @@ void Writer::layoutMemory() { // __heap_base to be aligned already. memoryPtr = alignTo(memoryPtr, heapAlignment); log("mem: heap base = " + Twine(memoryPtr)); - WasmSym::heapBase->setVA(memoryPtr); + ctx.sym.heapBase->setVA(memoryPtr); } uint64_t maxMemorySetting = 1ULL << 32; @@ -470,12 +470,12 @@ void Writer::layoutMemory() { out.memorySec->numMemoryPages = memoryPtr / WasmPageSize; log("mem: total pages = " + Twine(out.memorySec->numMemoryPages)); - if (WasmSym::heapEnd) { + if (ctx.sym.heapEnd) { // Set `__heap_end` to follow the end of the statically allocated linear // memory. The fact that this comes last means that a malloc/brk // implementation can grow the heap at runtime. log("mem: heap end = " + Twine(memoryPtr)); - WasmSym::heapEnd->setVA(memoryPtr); + ctx.sym.heapEnd->setVA(memoryPtr); } uint64_t maxMemory = 0; @@ -758,14 +758,14 @@ void Writer::calculateImports() { // Some inputs require that the indirect function table be assigned to table // number 0, so if it is present and is an import, allocate it before any // other tables. - if (WasmSym::indirectFunctionTable && - shouldImport(WasmSym::indirectFunctionTable)) - out.importSec->addImport(WasmSym::indirectFunctionTable); + if (ctx.sym.indirectFunctionTable && + shouldImport(ctx.sym.indirectFunctionTable)) + out.importSec->addImport(ctx.sym.indirectFunctionTable); for (Symbol *sym : symtab->symbols()) { if (!shouldImport(sym)) continue; - if (sym == WasmSym::indirectFunctionTable) + if (sym == ctx.sym.indirectFunctionTable) continue; LLVM_DEBUG(dbgs() << "import: " << sym->getName() << "\n"); out.importSec->addImport(sym); @@ -879,7 +879,7 @@ void Writer::createCommandExportWrappers() { // If there are no ctors and there's no libc `__wasm_call_dtors` to // call, don't wrap the exports. - if (initFunctions.empty() && WasmSym::callDtors == nullptr) + if (initFunctions.empty() && ctx.sym.callDtors == nullptr) return; std::vector toWrap; @@ -919,27 +919,27 @@ void Writer::createCommandExportWrappers() { } static void finalizeIndirectFunctionTable() { - if (!WasmSym::indirectFunctionTable) + if (!ctx.sym.indirectFunctionTable) return; - if (shouldImport(WasmSym::indirectFunctionTable) && - !WasmSym::indirectFunctionTable->hasTableNumber()) { + if (shouldImport(ctx.sym.indirectFunctionTable) && + !ctx.sym.indirectFunctionTable->hasTableNumber()) { // Processing -Bsymbolic relocations resulted in a late requirement that the // indirect function table be present, and we are running in --import-table // mode. Add the table now to the imports section. Otherwise it will be // added to the tables section later in assignIndexes. - out.importSec->addImport(WasmSym::indirectFunctionTable); + out.importSec->addImport(ctx.sym.indirectFunctionTable); } uint32_t tableSize = ctx.arg.tableBase + out.elemSec->numEntries(); WasmLimits limits = {0, tableSize, 0}; - if (WasmSym::indirectFunctionTable->isDefined() && !ctx.arg.growableTable) { + if (ctx.sym.indirectFunctionTable->isDefined() && !ctx.arg.growableTable) { limits.Flags |= WASM_LIMITS_FLAG_HAS_MAX; limits.Maximum = limits.Minimum; } if (ctx.arg.is64.value_or(false)) limits.Flags |= WASM_LIMITS_FLAG_IS_64; - WasmSym::indirectFunctionTable->setLimits(limits); + ctx.sym.indirectFunctionTable->setLimits(limits); } static void scanRelocations() { @@ -1142,26 +1142,26 @@ void Writer::createSyntheticInitFunctions() { // We also initialize bss segments (using memory.fill) as part of this // function. if (hasPassiveInitializedSegments()) { - WasmSym::initMemory = symtab->addSyntheticFunction( + ctx.sym.initMemory = symtab->addSyntheticFunction( "__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_init_memory")); - WasmSym::initMemory->markLive(); + ctx.sym.initMemory->markLive(); if (ctx.arg.sharedMemory) { // This global is assigned during __wasm_init_memory in the shared memory // case. - WasmSym::tlsBase->markLive(); + ctx.sym.tlsBase->markLive(); } } if (ctx.arg.sharedMemory) { if (out.globalSec->needsTLSRelocations()) { - WasmSym::applyGlobalTLSRelocs = symtab->addSyntheticFunction( + ctx.sym.applyGlobalTLSRelocs = symtab->addSyntheticFunction( "__wasm_apply_global_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_apply_global_tls_relocs")); - WasmSym::applyGlobalTLSRelocs->markLive(); + ctx.sym.applyGlobalTLSRelocs->markLive(); // TLS relocations depend on the __tls_base symbols - WasmSym::tlsBase->markLive(); + ctx.sym.tlsBase->markLive(); } auto hasTLSRelocs = [](const OutputSegment *segment) { @@ -1172,40 +1172,39 @@ void Writer::createSyntheticInitFunctions() { return false; }; if (llvm::any_of(segments, hasTLSRelocs)) { - WasmSym::applyTLSRelocs = symtab->addSyntheticFunction( + ctx.sym.applyTLSRelocs = symtab->addSyntheticFunction( "__wasm_apply_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN, - make(nullSignature, - "__wasm_apply_tls_relocs")); - WasmSym::applyTLSRelocs->markLive(); + make(nullSignature, "__wasm_apply_tls_relocs")); + ctx.sym.applyTLSRelocs->markLive(); } } if (ctx.isPic && out.globalSec->needsRelocations()) { - WasmSym::applyGlobalRelocs = symtab->addSyntheticFunction( + ctx.sym.applyGlobalRelocs = symtab->addSyntheticFunction( "__wasm_apply_global_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_apply_global_relocs")); - WasmSym::applyGlobalRelocs->markLive(); + ctx.sym.applyGlobalRelocs->markLive(); } // If there is only one start function we can just use that function // itself as the Wasm start function, otherwise we need to synthesize // a new function to call them in sequence. - if (WasmSym::applyGlobalRelocs && WasmSym::initMemory) { - WasmSym::startFunction = symtab->addSyntheticFunction( + if (ctx.sym.applyGlobalRelocs && ctx.sym.initMemory) { + ctx.sym.startFunction = symtab->addSyntheticFunction( "__wasm_start", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_start")); - WasmSym::startFunction->markLive(); + ctx.sym.startFunction->markLive(); } } void Writer::createInitMemoryFunction() { LLVM_DEBUG(dbgs() << "createInitMemoryFunction\n"); - assert(WasmSym::initMemory); + assert(ctx.sym.initMemory); assert(hasPassiveInitializedSegments()); uint64_t flagAddress; if (ctx.arg.sharedMemory) { - assert(WasmSym::initMemoryFlag); - flagAddress = WasmSym::initMemoryFlag->getVA(); + assert(ctx.sym.initMemoryFlag); + flagAddress = ctx.sym.initMemoryFlag->getVA(); } bool is64 = ctx.arg.is64.value_or(false); std::string bodyContent; @@ -1278,7 +1277,7 @@ void Writer::createInitMemoryFunction() { writeUleb128(os, 2, "local count"); writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type"); writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); + writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "memory_base"); writePtrConst(os, flagAddress, is64, "flag address"); writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set"); @@ -1325,7 +1324,7 @@ void Writer::createInitMemoryFunction() { writePtrConst(os, s->startVA, is64, "destination address"); if (ctx.isPic) { writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), + writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "__memory_base"); writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "i32.add"); @@ -1343,8 +1342,7 @@ void Writer::createInitMemoryFunction() { writePtrConst(os, s->startVA, is64, "destination address"); } writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET"); - writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), - "__tls_base"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); if (ctx.isPic) { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.tee"); writeUleb128(os, 1, "local 1"); @@ -1420,30 +1418,30 @@ void Writer::createInitMemoryFunction() { writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::initMemory, bodyContent); + createFunction(ctx.sym.initMemory, bodyContent); } void Writer::createStartFunction() { // If the start function exists when we have more than one function to call. - if (WasmSym::initMemory && WasmSym::applyGlobalRelocs) { - assert(WasmSym::startFunction); + if (ctx.sym.initMemory && ctx.sym.applyGlobalRelocs) { + assert(ctx.sym.startFunction); std::string bodyContent; { raw_string_ostream os(bodyContent); writeUleb128(os, 0, "num locals"); writeU8(os, WASM_OPCODE_CALL, "CALL"); - writeUleb128(os, WasmSym::applyGlobalRelocs->getFunctionIndex(), + writeUleb128(os, ctx.sym.applyGlobalRelocs->getFunctionIndex(), "function index"); writeU8(os, WASM_OPCODE_CALL, "CALL"); - writeUleb128(os, WasmSym::initMemory->getFunctionIndex(), + writeUleb128(os, ctx.sym.initMemory->getFunctionIndex(), "function index"); writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::startFunction, bodyContent); - } else if (WasmSym::initMemory) { - WasmSym::startFunction = WasmSym::initMemory; - } else if (WasmSym::applyGlobalRelocs) { - WasmSym::startFunction = WasmSym::applyGlobalRelocs; + createFunction(ctx.sym.startFunction, bodyContent); + } else if (ctx.sym.initMemory) { + ctx.sym.startFunction = ctx.sym.initMemory; + } else if (ctx.sym.applyGlobalRelocs) { + ctx.sym.startFunction = ctx.sym.applyGlobalRelocs; } } @@ -1497,7 +1495,7 @@ void Writer::createApplyTLSRelocationsFunction() { writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::applyTLSRelocs, bodyContent); + createFunction(ctx.sym.applyTLSRelocs, bodyContent); } // Similar to createApplyDataRelocationsFunction but generates relocation code @@ -1513,7 +1511,7 @@ void Writer::createApplyGlobalRelocationsFunction() { writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::applyGlobalRelocs, bodyContent); + createFunction(ctx.sym.applyGlobalRelocs, bodyContent); } // Similar to createApplyGlobalRelocationsFunction but for @@ -1529,7 +1527,7 @@ void Writer::createApplyGlobalTLSRelocationsFunction() { writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::applyGlobalTLSRelocs, bodyContent); + createFunction(ctx.sym.applyGlobalTLSRelocs, bodyContent); } // Create synthetic "__wasm_call_ctors" function based on ctor functions @@ -1537,7 +1535,7 @@ void Writer::createApplyGlobalTLSRelocationsFunction() { void Writer::createCallCtorsFunction() { // If __wasm_call_ctors isn't referenced, there aren't any ctors, don't // define the `__wasm_call_ctors` function. - if (!WasmSym::callCtors->isLive() && initFunctions.empty()) + if (!ctx.sym.callCtors->isLive() && initFunctions.empty()) return; // First write the body's contents to a string. @@ -1558,7 +1556,7 @@ void Writer::createCallCtorsFunction() { writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::callCtors, bodyContent); + createFunction(ctx.sym.callCtors, bodyContent); } // Create a wrapper around a function export which calls the @@ -1573,10 +1571,9 @@ void Writer::createCommandExportWrapper(uint32_t functionIndex, // Call `__wasm_call_ctors` which call static constructors (and // applies any runtime relocations in Emscripten-style PIC mode) - if (WasmSym::callCtors->isLive()) { + if (ctx.sym.callCtors->isLive()) { writeU8(os, WASM_OPCODE_CALL, "CALL"); - writeUleb128(os, WasmSym::callCtors->getFunctionIndex(), - "function index"); + writeUleb128(os, ctx.sym.callCtors->getFunctionIndex(), "function index"); } // Call the user's code, leaving any return values on the operand stack. @@ -1588,7 +1585,7 @@ void Writer::createCommandExportWrapper(uint32_t functionIndex, writeUleb128(os, functionIndex, "function index"); // Call the function that calls the destructors. - if (DefinedFunction *callDtors = WasmSym::callDtors) { + if (DefinedFunction *callDtors = ctx.sym.callDtors) { writeU8(os, WASM_OPCODE_CALL, "CALL"); writeUleb128(os, callDtors->getFunctionIndex(), "function index"); } @@ -1619,7 +1616,7 @@ void Writer::createInitTLSFunction() { writeUleb128(os, 0, "local index"); writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); - writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index"); // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend op. writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); @@ -1635,28 +1632,28 @@ void Writer::createInitTLSFunction() { writeU8(os, 0, "memory index immediate"); } - if (WasmSym::applyTLSRelocs) { + if (ctx.sym.applyTLSRelocs) { writeU8(os, WASM_OPCODE_CALL, "CALL"); - writeUleb128(os, WasmSym::applyTLSRelocs->getFunctionIndex(), + writeUleb128(os, ctx.sym.applyTLSRelocs->getFunctionIndex(), "function index"); } - if (WasmSym::applyGlobalTLSRelocs) { + if (ctx.sym.applyGlobalTLSRelocs) { writeU8(os, WASM_OPCODE_CALL, "CALL"); - writeUleb128(os, WasmSym::applyGlobalTLSRelocs->getFunctionIndex(), + writeUleb128(os, ctx.sym.applyGlobalTLSRelocs->getFunctionIndex(), "function index"); } writeU8(os, WASM_OPCODE_END, "end function"); } - createFunction(WasmSym::initTLS, bodyContent); + createFunction(ctx.sym.initTLS, bodyContent); } // Populate InitFunctions vector with init functions from all input objects. // This is then used either when creating the output linking section or to // synthesize the "__wasm_call_ctors" function. void Writer::calculateInitFunctions() { - if (!ctx.arg.relocatable && !WasmSym::callCtors->isLive()) + if (!ctx.arg.relocatable && !ctx.sym.callCtors->isLive()) return; for (ObjFile *file : ctx.objectFiles) { @@ -1707,8 +1704,8 @@ void Writer::createSyntheticSectionsPostLayout() { void Writer::run() { // For PIC code the table base is assigned dynamically by the loader. // For non-PIC, we start at 1 so that accessing table index 0 always traps. - if (!ctx.isPic && WasmSym::definedTableBase) - WasmSym::definedTableBase->setVA(ctx.arg.tableBase); + if (!ctx.isPic && ctx.sym.definedTableBase) + ctx.sym.definedTableBase->setVA(ctx.arg.tableBase); log("-- createOutputSegments"); createOutputSegments(); @@ -1776,14 +1773,18 @@ void Writer::run() { if (!ctx.arg.relocatable) { // Create linker synthesized functions - if (WasmSym::applyGlobalRelocs) + if (ctx.sym.applyGlobalRelocs) { createApplyGlobalRelocationsFunction(); - if (WasmSym::applyTLSRelocs) + } + if (ctx.sym.applyTLSRelocs) { createApplyTLSRelocationsFunction(); - if (WasmSym::applyGlobalTLSRelocs) + } + if (ctx.sym.applyGlobalTLSRelocs) { createApplyGlobalTLSRelocationsFunction(); - if (WasmSym::initMemory) + } + if (ctx.sym.initMemory) { createInitMemoryFunction(); + } createStartFunction(); createCallCtorsFunction(); @@ -1794,14 +1795,14 @@ void Writer::run() { // the input objects or an explicit export from the command-line, we // assume ctors and dtors are taken care of already. if (!ctx.arg.relocatable && !ctx.isPic && - !WasmSym::callCtors->isUsedInRegularObj && - !WasmSym::callCtors->isExported()) { + !ctx.sym.callCtors->isUsedInRegularObj && + !ctx.sym.callCtors->isExported()) { log("-- createCommandExportWrappers"); createCommandExportWrappers(); } } - if (WasmSym::initTLS && WasmSym::initTLS->isLive()) { + if (ctx.sym.initTLS && ctx.sym.initTLS->isLive()) { log("-- createInitTLSFunction"); createInitTLSFunction(); } From 72ad9be1e337f487c9db4dd634005d09f7bf2790 Mon Sep 17 00:00:00 2001 From: Losy001 <64610343+Losy001@users.noreply.github.com> Date: Sat, 26 Apr 2025 18:04:12 +0200 Subject: [PATCH 22/83] [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) This pull request implements mangling for ConstantMatrixType, allowing matrices to be used on Windows. Related issues: #53158, #127127 This example code: ```cpp #include #include typedef float Matrix4 __attribute__((matrix_type(4, 4))); int main() { printf("%s\n", typeid(Matrix4).name()); } ``` Outputs this: ``` struct __clang::__matrix ``` (cherry picked from commit f5a30f111dc4ad6422863722eb708059a68a9d5c) --- clang/lib/AST/MicrosoftMangle.cpp | 17 ++++++- clang/test/CodeGenCXX/mangle-ms-matrix.cpp | 57 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCXX/mangle-ms-matrix.cpp diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 42b735ccf4a2c..74c995f2f97f0 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3552,7 +3552,22 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T, void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T, Qualifiers quals, SourceRange Range) { - Error(Range.getBegin(), "matrix type") << Range; + QualType EltTy = T->getElementType(); + const BuiltinType *ET = EltTy->getAs(); + + llvm::SmallString<64> TemplateMangling; + llvm::raw_svector_ostream Stream(TemplateMangling); + MicrosoftCXXNameMangler Extra(Context, Stream); + + Stream << "?$"; + + Extra.mangleSourceName("__matrix"); + Extra.mangleType(EltTy, Range, QMM_Escape); + + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumRows())); + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumColumns())); + + mangleArtificialTagType(TagTypeKind::Struct, TemplateMangling, {"__clang"}); } void MicrosoftCXXNameMangler::mangleType(const DependentSizedMatrixType *T, diff --git a/clang/test/CodeGenCXX/mangle-ms-matrix.cpp b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp new file mode 100644 index 0000000000000..b244aa6e33cfa --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 | FileCheck %s +// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 -fexperimental-new-constant-interpreter | FileCheck %s + +typedef float __attribute__((matrix_type(4, 4))) m4x4f; +typedef float __attribute__((matrix_type(2, 2))) m2x2f; + +typedef int __attribute__((matrix_type(4, 4))) m4x4i; +typedef int __attribute__((matrix_type(2, 2))) m2x2i; + +void thow(int i) { + switch (i) { + case 0: throw m4x4f(); + // CHECK: ??_R0U?$__matrix@M$03$03@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@M$03$03@__clang@@@864 + // CHECK: _CTA1U?$__matrix@M$03$03@__clang@@ + // CHECK: _TI1U?$__matrix@M$03$03@__clang@@ + case 1: throw m2x2f(); + // CHECK: ??_R0U?$__matrix@M$01$01@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@M$01$01@__clang@@@816 + // CHECK: _CTA1U?$__matrix@M$01$01@__clang@@ + // CHECK: _TI1U?$__matrix@M$01$01@__clang@@ + case 2: throw m4x4i(); + // CHECK: ??_R0U?$__matrix@H$03$03@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@H$03$03@__clang@@@864 + // CHECK: _CTA1U?$__matrix@H$03$03@__clang@@ + // CHECK: _TI1U?$__matrix@H$03$03@__clang@@ + case 3: throw m2x2i(); + // CHECK: ??_R0U?$__matrix@H$01$01@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@H$01$01@__clang@@@816 + // CHECK: _CTA1U?$__matrix@H$01$01@__clang@@ + // CHECK: _TI1U?$__matrix@H$01$01@__clang@@ + } +} + +void foo44f(m4x4f) {} +// CHECK: define dso_local void @"?foo44f@@YAXU?$__matrix@M$03$03@__clang@@@Z" + +m4x4f rfoo44f() { return m4x4f(); } +// CHECK: define dso_local noundef <16 x float> @"?rfoo44f@@YAU?$__matrix@M$03$03@__clang@@XZ" + +void foo22f(m2x2f) {} +// CHECK: define dso_local void @"?foo22f@@YAXU?$__matrix@M$01$01@__clang@@@Z" + +m2x2f rfoo22f() { return m2x2f(); } +// CHECK: define dso_local noundef <4 x float> @"?rfoo22f@@YAU?$__matrix@M$01$01@__clang@@XZ" + +void foo44i(m4x4i) {} +// CHECK: define dso_local void @"?foo44i@@YAXU?$__matrix@H$03$03@__clang@@@Z" + +m4x4i rfoo44i() { return m4x4i(); } +// CHECK: define dso_local noundef <16 x i32> @"?rfoo44i@@YAU?$__matrix@H$03$03@__clang@@XZ" + +void foo22i(m2x2i) {} +// CHECK: define dso_local void @"?foo22i@@YAXU?$__matrix@H$01$01@__clang@@@Z" + +m2x2i rfoo22i() { return m2x2i(); } +// CHECK: define dso_local noundef <4 x i32> @"?rfoo22i@@YAU?$__matrix@H$01$01@__clang@@XZ" \ No newline at end of file From 41c36d94080488cc938b1c1697c7e8353405cd75 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Sat, 26 Apr 2025 09:17:56 -0700 Subject: [PATCH 23/83] [clang] Fix unused variable warning in MS mangler from constant matrix patch (cherry picked from commit ccdd55c518277d749eff878ffcb5ca3de55c2a60) --- clang/lib/AST/MicrosoftMangle.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 74c995f2f97f0..cb35dbd611204 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3553,7 +3553,6 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T, void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T, Qualifiers quals, SourceRange Range) { QualType EltTy = T->getElementType(); - const BuiltinType *ET = EltTy->getAs(); llvm::SmallString<64> TemplateMangling; llvm::raw_svector_ostream Stream(TemplateMangling); From f233430d977b67cc6aba9362b7d9f1a82d43d6d1 Mon Sep 17 00:00:00 2001 From: Yuta Mukai Date: Fri, 9 May 2025 17:07:19 +0900 Subject: [PATCH 24/83] [AArch64] Fix feature list for FUJITSU-MONAKA processor (#139212) FEAT_FP8DOT4 and FEAT_FP8FMA are supported by FUJITSU-MONAKA. These were previously enabled due to dependencies, but now require explicit activation due to modifications in the dependencies. (cherry picked from commit 9d5a5424f0356bd6ee01c751dd6957299783b41b) --- .../Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c | 2 ++ llvm/lib/Target/AArch64/AArch64Processors.td | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c index a80d0f5c79ec1..29e9682d58700 100644 --- a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c +++ b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c @@ -28,6 +28,8 @@ // CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing // CHECK-NEXT: FEAT_FP8 Enable FP8 instructions // CHECK-NEXT: FEAT_FP8DOT2 Enable FP8 2-way dot instructions +// CHECK-NEXT: FEAT_FP8DOT4 Enable FP8 4-way dot instructions +// CHECK-NEXT: FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions // CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement // CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int // CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index d1d4986d12550..80454b4f72d05 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -868,7 +868,8 @@ def ProcessorFeatures { FeatureSSBS, FeatureLS64, FeatureCLRBHB, FeatureSPECRES2, FeatureSVEAES, FeatureSVE2SM4, FeatureSVE2SHA3, FeatureSVE2, FeatureSVEBitPerm, FeatureETE, - FeatureMEC, FeatureFAMINMAX, FeatureFP8DOT2, FeatureLUT]; + FeatureMEC, FeatureFAMINMAX, FeatureFP8DOT2, FeatureFP8DOT4, + FeatureFP8FMA, FeatureLUT]; list Carmel = [HasV8_2aOps, FeatureNEON, FeatureSHA2, FeatureAES, FeatureFullFP16, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM, FeatureFPARMv8]; From 2cacf46f35c8fa174a30a4b628a4b19e391a4798 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sat, 10 May 2025 13:14:01 +0800 Subject: [PATCH 25/83] [X86][TargetLowering] Avoid deleting temporary nodes in `getNegatedExpression` (#139029) In the original case, the third call to `getCheaperNegatedExpression` deletes the SDNode returned by the first call. Similar to 74e6030bcbcc8e628f9a99a424342a0c656456f9, this patch uses `HandleSDNodes` to prevent nodes from being deleted by subsequent calls. Closes https://github.com/llvm/llvm-project/issues/138944. (cherry picked from commit 143cce72b1f50bc37363315793b80ae92d2b0ae3) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++++++- llvm/test/CodeGen/X86/pr138982.ll | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr138982.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 627cef9ead7ff..4413fbb77f415 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54147,12 +54147,19 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (!Flags.hasNoSignedZeros()) break; + // Because getCheaperNegatedExpression can delete nodes we need a handle to + // keep temporary nodes alive. + std::list Handles; + // This is always negatible for free but we might be able to remove some // extra operand negations as well. SmallVector NewOps(Op.getNumOperands(), SDValue()); - for (int i = 0; i != 3; ++i) + for (int i = 0; i != 3; ++i) { NewOps[i] = getCheaperNegatedExpression( Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1); + if (!!NewOps[i]) + Handles.emplace_back(NewOps[i]); + } bool NegA = !!NewOps[0]; bool NegB = !!NewOps[1]; diff --git a/llvm/test/CodeGen/X86/pr138982.ll b/llvm/test/CodeGen/X86/pr138982.ll new file mode 100644 index 0000000000000..32346d823a9fe --- /dev/null +++ b/llvm/test/CodeGen/X86/pr138982.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64 -mattr=+fma | FileCheck %s + +define <4 x float> @pr138982(<4 x float> %in_vec) { +; CHECK-LABEL: pr138982: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-NEXT: vrcpps %xmm0, %xmm2 +; CHECK-NEXT: vrcpps %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpneqps %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm4, %xmm0 +; CHECK-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm3 * xmm2) + xmm0 +; CHECK-NEXT: retq +entry: + %fneg = fneg <4 x float> %in_vec + %rcp = tail call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %fneg) + %cmp = fcmp une <4 x float> zeroinitializer, %in_vec + %sel = select <4 x i1> %cmp, <4 x float> %rcp, <4 x float> splat (float 1.000000e+00) + %fma = call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %rcp, <4 x float> zeroinitializer, <4 x float> %sel) + ret <4 x float> %fma +} From 2d079b96a5fb2d1da62cfddbafa6632058b22e76 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 9 May 2025 17:55:48 -0700 Subject: [PATCH 26/83] release/20.x: [clang-format] Fix a crash on formatting missing r_paren/r_brace (#138230) Backport 79210feb2993ff9a79ef11f8a7016a527d4fcf22 --- clang/lib/Format/UnwrappedLineParser.cpp | 4 ++-- clang/unittests/Format/FormatTest.cpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c3ffabce15ec8..673b3e6c4b8c2 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1837,8 +1837,8 @@ void UnwrappedLineParser::parseStructuralElement( nextToken(); if (FormatTok->is(tok::l_paren)) { parseParens(); - assert(FormatTok->Previous); - if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, + if (FormatTok->Previous && + FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, tok::l_paren)) { Line->SeenDecltypeAuto = true; } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 49e1fde1d9ccf..90a79230e9f4c 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -13962,6 +13962,8 @@ TEST_F(FormatTest, IncorrectCodeUnbalancedBraces) { verifyNoCrash("struct Foo {\n" " operator foo(bar\n" "};"); + verifyNoCrash("decltype( {\n" + " {"); } TEST_F(FormatTest, IncorrectUnbalancedBracesInMacrosWithUnicode) { From 74ed1ac61104afd632b8553fe64524851ef37478 Mon Sep 17 00:00:00 2001 From: mojyack <66899529+mojyack@users.noreply.github.com> Date: Fri, 11 Apr 2025 06:23:26 +0900 Subject: [PATCH 27/83] [sanitizer_common] Fix build on ppc64+musl (#120036) In powerpc64-unknown-linux-musl, signal.h does not include asm/ptrace.h, which causes "member access into incomplete type 'struct pt_regs'" errors. Include the header explicitly to fix this. Also in sanitizer_linux_libcdep.cpp, there is a usage of TlsPreTcbSize which is not defined in such a platform. Guard the branch with macro. (cherry picked from commit 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77) --- .../lib/sanitizer_common/sanitizer_linux.cpp | 4 ++++ .../sanitizer_common/sanitizer_linux_libcdep.cpp | 13 +++++++------ .../sanitizer_platform_limits_posix.cpp | 2 +- .../sanitizer_stoptheworld_linux_libcdep.cpp | 3 ++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 7aa48d29d2d53..a4d526b4466c3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -86,6 +86,10 @@ # include # endif +# if SANITIZER_LINUX && defined(__powerpc64__) +# include +# endif + # if SANITIZER_FREEBSD # include # include diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index e11eff13cd326..331e1c7d8d152 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -619,21 +619,22 @@ static void GetTls(uptr *addr, uptr *size) { *addr = tp - RoundUpTo(*size, align); *size = tp - *addr + ThreadDescriptorSize(); # else - if (SANITIZER_GLIBC) - *size += 1664; - else if (SANITIZER_FREEBSD) - *size += 128; // RTLD_STATIC_TLS_EXTRA -# if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 +# if SANITIZER_GLIBC + *size += 1664; +# elif SANITIZER_FREEBSD + *size += 128; // RTLD_STATIC_TLS_EXTRA +# if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 const uptr pre_tcb_size = TlsPreTcbSize(); *addr -= pre_tcb_size; *size += pre_tcb_size; -# else +# else // arm and aarch64 reserve two words at TP, so this underestimates the range. // However, this is sufficient for the purpose of finding the pointers to // thread-specific data keys. const uptr tcb_size = ThreadDescriptorSize(); *addr -= tcb_size; *size += tcb_size; +# endif # endif # endif # elif SANITIZER_NETBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index a5311d266b0c4..ec5f2edab6a64 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -96,7 +96,7 @@ # include # if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || \ - defined(__sparc__) + defined(__sparc__) || defined(__powerpc64__) # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 945da99d41f4e..58d17d90c343a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -31,7 +31,8 @@ #include // for pid_t #include // for iovec #include // for NT_PRSTATUS -#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \ +#if (defined(__aarch64__) || defined(__powerpc64__) || \ + SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \ !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h # include From 0439d1d36312b4abe705d8048cfae64e7fedff6a Mon Sep 17 00:00:00 2001 From: cor3ntin Date: Fri, 2 May 2025 18:45:24 +0200 Subject: [PATCH 28/83] [Clang] Fix handling of reference types in tryEvaluateBuiltinObjectSize (#138247) The order of operation was slightly incorrect, as we were checking for incomplete types *before* handling reference types. Fixes #129397 --------- Co-authored-by: Erich Keane --- clang/lib/AST/ExprConstant.cpp | 8 +++++--- clang/test/SemaCXX/builtin-object-size-cxx14.cpp | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5aae78dd2fee7..23602362eaa79 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12710,11 +12710,13 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, bool DetermineForCompleteObject = refersToCompleteObject(LVal); auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) { - if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType()) + if (Ty.isNull()) return false; - if (Ty->isReferenceType()) - Ty = Ty.getNonReferenceType(); + Ty = Ty.getNonReferenceType(); + + if (Ty->isIncompleteType() || Ty->isFunctionType()) + return false; return HandleSizeof(Info, ExprLoc, Ty, Result); }; diff --git a/clang/test/SemaCXX/builtin-object-size-cxx14.cpp b/clang/test/SemaCXX/builtin-object-size-cxx14.cpp index b7c6f6be01f54..fdd3cb7af088f 100644 --- a/clang/test/SemaCXX/builtin-object-size-cxx14.cpp +++ b/clang/test/SemaCXX/builtin-object-size-cxx14.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx14 -std=c++14 %s // RUN: %clang_cc1 -fsyntax-only -verify -std=c++2a %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s + typedef __SIZE_TYPE__ size_t; @@ -119,3 +121,13 @@ constexpr int bos_new() { // cxx14-error {{constant expression}} void *p = new int; // cxx14-note {{until C++20}} return __builtin_object_size(p, 0); } + + +namespace GH129397 { + +struct incomplete; +void test(incomplete &ref) { + __builtin_object_size(&ref, 1); +} + +} From 1c0368417f55417b8b08ae5c605231be096ef4bc Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Mon, 17 Feb 2025 11:12:55 +0100 Subject: [PATCH 29/83] [clang][analysis] Fix flaky clang/test/Analysis/live-stmts.cpp test (2nd attempt) (#127406) In my previous attempt (#126913) of fixing the flaky case was on a good track when I used the begin locations as a stable ordering. However, I forgot to consider the case when the begin locations are the same among the Exprs. In an `EXPENSIVE_CHECKS` build, arrays are randomly shuffled prior to sorting them. This exposed the flaky behavior much more often basically breaking the "stability" of the vector - as it should. Because of this, I had to revert the previous fix attempt in #127034. To fix this, I use this time `Expr::getID` for a stable ID for an Expr. Hopefully fixes #126619 Hopefully fixes #126804 (cherry picked from commit f378e52ed3c6f8da4973f97f1ef043c2eb0da721) --- clang/lib/Analysis/LiveVariables.cpp | 11 +++++++++-- clang/test/Analysis/live-stmts.cpp | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp index 481932ee59c8e..5fb5ee767a683 100644 --- a/clang/lib/Analysis/LiveVariables.cpp +++ b/clang/lib/Analysis/LiveVariables.cpp @@ -662,12 +662,19 @@ void LiveVariables::dumpExprLiveness(const SourceManager &M) { } void LiveVariablesImpl::dumpExprLiveness(const SourceManager &M) { + const ASTContext &Ctx = analysisContext.getASTContext(); + auto ByIDs = [&Ctx](const Expr *L, const Expr *R) { + return L->getID(Ctx) < R->getID(Ctx); + }; + // Don't iterate over blockEndsToLiveness directly because it's not sorted. for (const CFGBlock *B : *analysisContext.getCFG()) { - llvm::errs() << "\n[ B" << B->getBlockID() << " (live expressions at block exit) ]\n"; - for (const Expr *E : blocksEndToLiveness[B].liveExprs) { + std::vector LiveExprs; + llvm::append_range(LiveExprs, blocksEndToLiveness[B].liveExprs); + llvm::sort(LiveExprs, ByIDs); + for (const Expr *E : LiveExprs) { llvm::errs() << "\n"; E->dump(); } diff --git a/clang/test/Analysis/live-stmts.cpp b/clang/test/Analysis/live-stmts.cpp index c60f522588e39..ca2ff6da8b133 100644 --- a/clang/test/Analysis/live-stmts.cpp +++ b/clang/test/Analysis/live-stmts.cpp @@ -44,6 +44,8 @@ int testThatDumperWorks(int x, int y, int z) { // CHECK-NEXT: ImplicitCastExpr {{.*}} // CHECK-NEXT: `-ImplicitCastExpr {{.*}} // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' +// CHECK-EMPTY: +// CHECK-EMPTY: // CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int' From a708fb737a78ff0b3d13d3d8e21f354542947e07 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Sun, 27 Apr 2025 11:12:47 +0800 Subject: [PATCH 30/83] [RISCV] Allow `Zicsr`/`Zifencei` to duplicate with `g` (#136842) This matches GCC and we supported it in LLVM 17/18. Fixes #136803 (cherry picked from commit 6c3373534305a2ce23dd939344dd0a387a09fe88) --- clang/docs/ReleaseNotes.rst | 2 ++ llvm/lib/TargetParser/RISCVISAInfo.cpp | 18 +++++++++++++++--- .../TargetParser/RISCVISAInfoTest.cpp | 8 ++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b8f26ec9a5447..47ef2f80ac3f2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1267,6 +1267,8 @@ RISC-V Support - The option ``-mcmodel=large`` for the large code model is supported. - Bump RVV intrinsic to version 1.0, the spec: https://github.com/riscv-non-isa/rvv-intrinsic-doc/releases/tag/v1.0.0-rc4 +- `Zicsr` / `Zifencei` are allowed to be duplicated in the presence of `g` in `-march`. + CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed a bug about overriding a constexpr pure-virtual member function with a non-constexpr virtual member function which causes compilation failure when including standard C++ header `format`. diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index c78d60fd86b3f..64ec411cb06e1 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -45,9 +45,8 @@ struct RISCVProfile { } // end anonymous namespace -static const char *RISCVGImplications[] = { - "i", "m", "a", "f", "d", "zicsr", "zifencei" -}; +static const char *RISCVGImplications[] = {"i", "m", "a", "f", "d"}; +static const char *RISCVGImplicationsZi[] = {"zicsr", "zifencei"}; #define GET_SUPPORTED_EXTENSIONS #include "llvm/TargetParser/RISCVTargetParserDef.inc" @@ -718,6 +717,19 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, } while (!Ext.empty()); } + // We add Zicsr/Zifenci as final to allow duplicated "zicsr"/"zifencei" like + // "rv64g_zicsr_zifencei". + if (Baseline == 'g') { + for (const char *Ext : RISCVGImplicationsZi) { + if (ISAInfo->Exts.count(Ext)) + continue; + + auto Version = findDefaultVersion(Ext); + assert(Version && "Default extension version not found?"); + ISAInfo->Exts[std::string(Ext)] = {Version->Major, Version->Minor}; + } + } + return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo)); } diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 7ebfcf915a7c5..5089bc0fd479a 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -507,6 +507,14 @@ TEST(ParseArchString, RejectsDoubleOrTrailingUnderscore) { } TEST(ParseArchString, RejectsDuplicateExtensionNames) { + // Zicsr/Zifencei are allowed to duplicate with "g". + ASSERT_THAT_EXPECTED(RISCVISAInfo::parseArchString("rv64g_zicsr", true), + Succeeded()); + ASSERT_THAT_EXPECTED(RISCVISAInfo::parseArchString("rv64g_zifencei", true), + Succeeded()); + ASSERT_THAT_EXPECTED( + RISCVISAInfo::parseArchString("rv64g_zicsr_zifencei", true), Succeeded()); + EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64ii", true).takeError()), "invalid standard user-level extension 'i'"); EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv32ee", true).takeError()), From 7b09d7b446383b71b63d429b21ee45ba389c5134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Mon, 12 May 2025 10:56:29 +0200 Subject: [PATCH 31/83] [analyzer] Workaround for slowdown spikes (unintended scope increase) (#136720) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently some users reported that they observed large increases of runtime (up to +600% on some translation units) when they upgraded to a more recent (slightly patched, internal) clang version. Bisection revealed that the bulk of this increase was probably caused by my earlier commit bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849 ("Don't assume third iteration in loops"). As I evaluated that earlier commit on several open source project, it turns out that on average it's runtime-neutral (or slightly helpful: it reduced the total analysis time by 1.5%) but it can cause runtime spikes on some code: in particular it more than doubled the time to analyze `tmux` (one of the smaller test projects). Further profiling and investigation proved that these spikes were caused by an _increase of analysis scope_ because there was an heuristic that placed functions on a "don't inline this" blacklist if they reached the `-analyzer-max-loop` limit (anywhere, on any one execution path) -- which became significantly rarer when my commit ensured the analyzer no longer "just assumes" four iterations. (With more inlining significantly more entry points use up their allocated budgets, which leads to the increased runtime.) I feel that this heuristic for the "don't inline" blacklist is unjustified and arbitrary, because reaching the "retry without inlining" limit on one path does not imply that inlining the function won't be valuable on other paths -- so I hope that we can eventually replace it with more "natural" limits of the analysis scope. However, the runtime increases are annoying for the users whose project is affected, so I created this quick workaround commit that approximates the "don't inline" blacklist effects of ambiguous loops (where the analyzer doesn't understand the loop condition) without fully reverting the "Don't assume third iteration" commit (to avoid reintroducing the false positives that were eliminated by it). Investigating this issue was a team effort: I'm grateful to Endre Fülöp (gamesh411) who did the bisection and shared his time measurement setup, and Gábor Tóthvári (tigbr) who helped me in profiling. (cherry picked from commit 9600a12f0de233324b559f60997b9c2db153fede) --- .../StaticAnalyzer/Core/AnalyzerOptions.def | 13 ++ .../Core/PathSensitive/FunctionSummary.h | 4 - clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 60 +++++- clang/test/Analysis/analyzer-config.c | 1 + .../Analysis/loop-based-inlining-prevention.c | 200 ++++++++++++++++++ clang/test/Analysis/loop-unrolling.cpp | 30 ++- 6 files changed, 286 insertions(+), 22 deletions(-) create mode 100644 clang/test/Analysis/loop-based-inlining-prevention.c diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 34bb7a809162b..dbb8e832db5ff 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -385,6 +385,19 @@ ANALYZER_OPTION( "flex\" won't be analyzed.", true) +ANALYZER_OPTION( + bool, InlineFunctionsWithAmbiguousLoops, "inline-functions-with-ambiguous-loops", + "If disabled (the default), the analyzer puts functions on a \"do not " + "inline this\" list if it finds an execution path within that function " + "that may potentially perform 'analyzer-max-loop' (= 4 by default) " + "iterations in a loop. (Note that functions that _definitely_ reach the " + "loop limit on some execution path are currently marked as \"do not " + "inline\" even if this option is enabled.) Enabling this option " + "eliminates this (somewhat arbitrary) restriction from the analysis " + "scope, which increases the analysis runtime (on average by ~10%, but " + "a few translation units may see much larger slowdowns).", + false) + //===----------------------------------------------------------------------===// // Unsigned analyzer options. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h index 3ee0d229cfc29..761395260a0cf 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -81,10 +81,6 @@ class FunctionSummariesTy { I->second.MayInline = 0; } - void markReachedMaxBlockCount(const Decl *D) { - markShouldNotInline(D); - } - std::optional mayInline(const Decl *D) { MapTy::const_iterator I = Map.find(D); if (I != Map.end() && I->second.InlineChecked) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 140c77790496d..cfb8be2e7f0f8 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2510,6 +2510,20 @@ bool ExprEngine::replayWithoutInlining(ExplodedNode *N, return true; } +/// Return the innermost location context which is inlined at `Node`, unless +/// it's the top-level (entry point) location context. +static const LocationContext *getInlinedLocationContext(ExplodedNode *Node, + ExplodedGraph &G) { + const LocationContext *CalleeLC = Node->getLocation().getLocationContext(); + const LocationContext *RootLC = + (*G.roots_begin())->getLocation().getLocationContext(); + + if (CalleeLC->getStackFrame() == RootLC->getStackFrame()) + return nullptr; + + return CalleeLC; +} + /// Block entrance. (Update counters). void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, NodeBuilderWithSinks &nodeBuilder, @@ -2557,21 +2571,24 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, const ExplodedNode *Sink = nodeBuilder.generateSink(Pred->getState(), Pred, &tag); - // Check if we stopped at the top level function or not. - // Root node should have the location context of the top most function. - const LocationContext *CalleeLC = Pred->getLocation().getLocationContext(); - const LocationContext *CalleeSF = CalleeLC->getStackFrame(); - const LocationContext *RootLC = - (*G.roots_begin())->getLocation().getLocationContext(); - if (RootLC->getStackFrame() != CalleeSF) { - Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl()); + if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) { + // FIXME: This will unconditionally prevent inlining this function (even + // from other entry points), which is not a reasonable heuristic: even if + // we reached max block count on this particular execution path, there + // may be other execution paths (especially with other parametrizations) + // where the analyzer can reach the end of the function (so there is no + // natural reason to avoid inlining it). However, disabling this would + // significantly increase the analysis time (because more entry points + // would exhaust their allocated budget), so it must be compensated by a + // different (more reasonable) reduction of analysis scope. + Engine.FunctionSummaries->markShouldNotInline( + LC->getStackFrame()->getDecl()); // Re-run the call evaluation without inlining it, by storing the // no-inlining policy in the state and enqueuing the new work item on // the list. Replay should almost never fail. Use the stats to catch it // if it does. - if ((!AMgr.options.NoRetryExhausted && - replayWithoutInlining(Pred, CalleeLC))) + if ((!AMgr.options.NoRetryExhausted && replayWithoutInlining(Pred, LC))) return; NumMaxBlockCountReachedInInlined++; } else @@ -2835,8 +2852,29 @@ void ExprEngine::processBranch( // conflicts with the widen-loop analysis option (which is off by // default). If we intend to support and stabilize the loop widening, // we must ensure that it 'plays nicely' with this logic. - if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) + if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) { Builder.generateNode(StTrue, true, PredN); + } else if (!AMgr.options.InlineFunctionsWithAmbiguousLoops) { + // FIXME: There is an ancient and arbitrary heuristic in + // `ExprEngine::processCFGBlockEntrance` which prevents all further + // inlining of a function if it finds an execution path within that + // function which reaches the `MaxBlockVisitOnPath` limit (a/k/a + // `analyzer-max-loop`, by default four iterations in a loop). Adding + // this "don't assume third iteration" logic significantly increased + // the analysis runtime on some inputs because less functions were + // arbitrarily excluded from being inlined, so more entry points used + // up their full allocated budget. As a hacky compensation for this, + // here we apply the "should not inline" mark in cases when the loop + // could potentially reach the `MaxBlockVisitOnPath` limit without the + // "don't assume third iteration" logic. This slightly overcompensates + // (activates if the third iteration can be entered, and will not + // recognize cases where the fourth iteration would't be completed), but + // should be good enough for practical purposes. + if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) { + Engine.FunctionSummaries->markShouldNotInline( + LC->getStackFrame()->getDecl()); + } + } } if (StFalse) diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index d5eb790b82f23..b47ca59e79827 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -88,6 +88,7 @@ // CHECK-NEXT: graph-trim-interval = 1000 // CHECK-NEXT: ignore-bison-generated-files = true // CHECK-NEXT: ignore-flex-generated-files = true +// CHECK-NEXT: inline-functions-with-ambiguous-loops = false // CHECK-NEXT: inline-lambdas = true // CHECK-NEXT: ipa = dynamic-bifurcate // CHECK-NEXT: ipa-always-inline-size = 3 diff --git a/clang/test/Analysis/loop-based-inlining-prevention.c b/clang/test/Analysis/loop-based-inlining-prevention.c new file mode 100644 index 0000000000000..73627112e2d32 --- /dev/null +++ b/clang/test/Analysis/loop-based-inlining-prevention.c @@ -0,0 +1,200 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify=expected,default %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config inline-functions-with-ambiguous-loops=true -verify=expected,enabled %s + +// This file tests some heuristics in the engine that put functions on a +// "do not inline" list if their analyisis reaches the `analyzer-max-loop` +// limit (by default 4 iterations) in a loop. This was almost surely intended +// as memoization optimization for the "retry without inlining" fallback (if we +// had to retry once, next time don't even try inlining), but aggressively +// oversteps the "natural" scope: reaching 4 iterations on _one particular_ +// execution path does not imply that each path would need "retry without +// inlining" especially if a different call receives different arguments. +// +// This heuristic significantly affects the scope/depth of the analysis (and +// therefore the execution time) because without this limitation on the +// inlining significantly more entry points would be able to exhaust their +// `max-nodes` quota. (Trivial thin wrappers around big complex functions are +// common in many projects.) +// +// Unfortunately, this arbitrary heuristic strongly relies on the current loop +// handling model and its many limitations, so improvements in loop handling +// can cause surprising slowdowns by reducing the "do not inline" blacklist. +// In the tests "FIXME-BUT-NEEDED" comments mark "problematic" (aka buggy) +// analyzer behavior which cannot be fixed without also improving the +// heuristics for (not) inlining large functions. + + int getNum(void); // Get an unknown symbolic number. + +void clang_analyzer_dump(int arg); + +//----------------------------------------------------------------------------- +// Simple case: inlined function never reaches `analyzer-max-loop`, so it is +// always inlined. + +int inner_simple(int callIdx) { + clang_analyzer_dump(callIdx); // expected-warning {{1 S32}} + // expected-warning@-1 {{2 S32}} + return 42; +} + +int outer_simple(void) { + int x = inner_simple(1); + int y = inner_simple(2); + return 53 / (x - y); // expected-warning {{Division by zero}} +} + +//----------------------------------------------------------------------------- +// Inlined function always reaches `analyzer-max-loop`, which stops the +// analysis on that path and puts the function on the "do not inline" list. + +int inner_fixed_loop_1(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // expected-warning {{1 S32}} + for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis. + clang_analyzer_dump(callIdx); // no-warning + return 42; +} + +int outer_fixed_loop_1(void) { + int x = inner_fixed_loop_1(1); + int y = inner_fixed_loop_1(2); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function always reaches `analyzer-max-loop`; inlining is prevented +// even for different entry points. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_fixed_loop_2` is analyzed first and it will be the one which is +// able to inline the inner function. + +int inner_fixed_loop_2(int callIdx) { + // Identical copy of inner_fixed_loop_1. + int i; + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis. + clang_analyzer_dump(callIdx); // no-warning + return 42; +} + +int outer_1_fixed_loop_2(void) { + return inner_fixed_loop_2(1); +} + +int outer_2_fixed_loop_2(void) { + return inner_fixed_loop_2(2); +} + +//----------------------------------------------------------------------------- +// Inlined function reaches `analyzer-max-loop` only in its second call. The +// function is inlined twice but the second call doesn't finish and ends up +// being conservatively evaluated. + +int inner_parametrized_loop_1(int count) { + int i; + clang_analyzer_dump(count); // expected-warning {{2 S32}} + // expected-warning@-1 {{10 S32}} + for (i = 0; i < count; i++); + // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4. + clang_analyzer_dump(count); // expected-warning {{2 S32}} + return 42; +} + +int outer_parametrized_loop_1(void) { + int x = inner_parametrized_loop_1(2); + int y = inner_parametrized_loop_1(10); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function reaches `analyzer-max-loop` on its first call, so the +// second call isn't inlined (although it could be fully evaluated). + +int inner_parametrized_loop_2(int count) { + // Identical copy of inner_parametrized_loop_1. + int i; + clang_analyzer_dump(count); // expected-warning {{10 S32}} + for (i = 0; i < count; i++); + // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4. + clang_analyzer_dump(count); // no-warning + return 42; +} + +int outer_parametrized_loop_2(void) { + int y = inner_parametrized_loop_2(10); + int x = inner_parametrized_loop_2(2); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function may or may not reach `analyzer-max-loop` depending on an +// ambiguous check before the loop. This is very similar to the "fixed loop" +// cases: the function is placed on the "don't inline" list when any execution +// path reaches `analyzer-max-loop` (even if other execution paths reach the +// end of the function). +// NOTE: This is tested with two separate entry points to ensure that one +// inlined call is fully evaluated before we try to inline the other call. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_conditional_loop` is analyzed first and it will be the one which +// is able to inline the inner function. + +int inner_conditional_loop(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + if (getNum() == 777) { + for (i = 0; i < 10; i++); + } + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + return 42; +} + +int outer_1_conditional_loop(void) { + return inner_conditional_loop(1); +} + +int outer_2_conditional_loop(void) { + return inner_conditional_loop(2); +} + +//----------------------------------------------------------------------------- +// Inlined function executes an ambiguous loop that may or may not reach +// `analyzer-max-loop`. Historically, before the "don't assume third iteration" +// commit (bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849) this worked like the +// `conditional_loop` cases: the analyzer was able to find a path reaching +// `analyzer-max-loop` so inlining was disabled. After that commit the analyzer +// does not _assume_ a third (or later) iteration (i.e. does not enter those +// iterations if the loop condition is an unknown value), so e.g. this test +// function does not reach `analyzer-max-loop` iterations and the inlining is +// not disabled. +// Unfortunately this change significantly increased the workload and +// runtime of the analyzer (more entry points used up their budget), so the +// option `inline-functions-with-ambiguous-loops` was introduced and disabled +// by default to suppress the inlining in situations where the "don't assume +// third iteration" logic activates. +// NOTE: This is tested with two separate entry points to ensure that one +// inlined call is fully evaluated before we try to inline the other call. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_ambiguous_loop` is analyzed first and it will be the one which +// is able to inline the inner function. + +int inner_ambiguous_loop(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // default-warning {{2 S32}} + // enabled-warning@-1 {{1 S32}} + // enabled-warning@-2 {{2 S32}} + for (i = 0; i < getNum(); i++); + return i; +} + +int outer_1_ambiguous_loop(void) { + return inner_ambiguous_loop(1); +} +int outer_2_ambiguous_loop(void) { + return inner_ambiguous_loop(2); +} diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp index bf05a7739ce48..ebae81e000c7a 100644 --- a/clang/test/Analysis/loop-unrolling.cpp +++ b/clang/test/Analysis/loop-unrolling.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify -std=c++14 -DDFS=1 %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify=expected,default -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify=expected,dfs -std=c++14 %s void clang_analyzer_numTimesReached(); void clang_analyzer_warnIfReached(); @@ -337,6 +337,7 @@ int nested_both_unrolled() { } int simple_known_bound_loop() { + // Iteration count visible: can be unrolled and fully executed. for (int i = 2; i < 12; i++) { // This function is inlined in nested_inlined_unroll1() clang_analyzer_numTimesReached(); // expected-warning {{90}} @@ -345,27 +346,42 @@ int simple_known_bound_loop() { } int simple_unknown_bound_loop() { + // Iteration count unknown: unrolling won't happen and the execution will be + // split two times: + // (1) split between skipped loop (immediate exit) and entering the loop + // (2) split between exit after 1 iteration and entering the second iteration + // After these there is no third state split because the "don't assume third + // iteration" logic in `ExprEngine::processBranch` prevents it; but the + // `legacy-inlining-prevention` logic will put this function onto the list of + // functions that may not be inlined in the future. + // The exploration strategy apparently influences the number of times this + // function can be inlined before it's placed on the "don't inline" list. for (int i = 2; i < getNum(); i++) { - clang_analyzer_numTimesReached(); // expected-warning {{8}} + clang_analyzer_numTimesReached(); // default-warning {{4}} dfs-warning {{8}} } return 0; } int nested_inlined_unroll1() { + // Here the analyzer can unroll and fully execute both the outer loop and the + // inner loop within simple_known_bound_loop(). int k; for (int i = 0; i < 9; i++) { clang_analyzer_numTimesReached(); // expected-warning {{9}} - k = simple_known_bound_loop(); // no reevaluation without inlining + k = simple_known_bound_loop(); } int a = 22 / k; // expected-warning {{Division by zero}} return 0; } int nested_inlined_no_unroll1() { + // Here no unrolling happens and we only run `analyzer-max-loop` (= 4) + // iterations of the loop within this function, but some state splits happen + // in `simple_unknown_bound_loop()` calls. int k; - for (int i = 0; i < 9; i++) { - clang_analyzer_numTimesReached(); // expected-warning {{10}} - k = simple_unknown_bound_loop(); // reevaluation without inlining, splits the state as well + for (int i = 0; i < 40; i++) { + clang_analyzer_numTimesReached(); // default-warning {{9}} dfs-warning {{12}} + k = simple_unknown_bound_loop(); } int a = 22 / k; // no-warning return 0; From 1e4d39e07757d10ff7b880565fca2ae12695622a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 May 2025 11:14:42 -0700 Subject: [PATCH 32/83] Bump version to 20.1.6 --- cmake/Modules/LLVMVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index c8cc0b8968b05..40ba164763d60 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 5) + set(LLVM_VERSION_PATCH 6) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) From ff2e8f93f6090965e82d799af43f6dfef52baa66 Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Wed, 7 May 2025 06:13:07 +0000 Subject: [PATCH 33/83] Fix test pfalse-v4i1.ll added in #138712 to require asserts. Should fix build bot failure: https://lab.llvm.org/buildbot/#/builders/202/builds/1102 (cherry picked from commit 194a4a333a95f9e001d2c8abe82c3d4cf8894acf) --- llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll index c0904b8b4fdd6..2c26bb1e310ea 100644 --- a/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll +++ b/llvm/test/CodeGen/Hexagon/isel/pfalse-v4i1.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s +; REQUIRES: asserts ; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0 ; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs From 85e06a7614831577a632905c7e3a4f6501fcabd3 Mon Sep 17 00:00:00 2001 From: tangaac Date: Wed, 7 May 2025 09:29:35 +0800 Subject: [PATCH 34/83] [LoongArch] Fix fp_to_uint/fp_to_sint conversion errors for lasx (#137129) Prvious `fp_to_uint/fp_to_sint` patterns for `v4f64 -> v4i32` are wrong. Conversion error was triggered after pr https://github.com/llvm/llvm-project/pull/126456. (cherry picked from commit b5c7724f82b6afe98761d0a1c5b6ee7cd2330ada) --- .../LoongArch/LoongArchLASXInstrInfo.td | 28 +++++++++---------- .../LoongArch/lasx/ir-instruction/fptosi.ll | 4 +-- .../LoongArch/lasx/ir-instruction/fptoui.ll | 4 +-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 7022fddf34100..9b515a2721d7f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1792,24 +1792,24 @@ def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), // XVFTINTRZ_{W_S/L_D} def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; -def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), - (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), - sub_128))>; -def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), - (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), - v4f64:$vj)), - sub_128)>; +def : Pat<(v4i64(fp_to_sint v4f32:$vj)), (VEXT2XV_D_W(SUBREG_TO_REG(i64 0), + (VFTINTRZ_W_S v4f32:$vj), + sub_128))>; +def : Pat<(v4i32(fp_to_sint v4f64:$vj)), + (EXTRACT_SUBREG(XVPICKEV_W(XVPERMI_D(XVFTINTRZ_L_D v4f64:$vj), 238), + (XVFTINTRZ_L_D v4f64:$vj)), + sub_128)>; // XVFTINTRZ_{W_SU/L_DU} def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; -def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), - (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), - sub_128))>; -def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), - (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), - v4f64:$vj)), - sub_128)>; +def : Pat<(v4i64(fp_to_uint v4f32:$vj)), (VEXT2XV_DU_WU(SUBREG_TO_REG(i64 0), + (VFTINTRZ_WU_S v4f32:$vj), + sub_128))>; +def : Pat<(v4i32(fp_to_uint v4f64:$vj)), + (EXTRACT_SUBREG(XVPICKEV_W(XVPERMI_D(XVFTINTRZ_LU_D v4f64:$vj), 238), + (XVFTINTRZ_LU_D v4f64:$vj)), + sub_128)>; // XVPERMI_Q foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll index 0d9f57b57ffae..ed333c303879c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll @@ -31,9 +31,9 @@ define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ ; CHECK-LABEL: fptosi_v4f64_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x double>, ptr %in diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll index 27d70f33cd34e..9c499ba71d646 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll @@ -31,9 +31,9 @@ define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ ; CHECK-LABEL: fptoui_v4f64_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 -; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 -; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x double>, ptr %in From e3d2c00ccee45e882233fce230c42b23423a8ef7 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 15 May 2025 03:28:18 -0700 Subject: [PATCH 35/83] [LLD][COFF] Allow -arm64xsameaddress in ARM64EC directives (#139631) Make it a no-op for now, which is sufficient for non-hybrid images. Fixes #131712. (cherry picked from commit d5da557782dd47395fb41e03d7663df6319d7ea6) --- lld/COFF/Driver.cpp | 6 +++ lld/COFF/Options.td | 1 + lld/test/COFF/arm64x-sameaddress.test | 56 +++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 lld/test/COFF/arm64x-sameaddress.test diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index f50ca529df4d7..a669b7e9296f6 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -492,6 +492,12 @@ void LinkerDriver::parseDirectives(InputFile *file) { case OPT_alternatename: parseAlternateName(arg->getValue()); break; + case OPT_arm64xsameaddress: + if (!file->symtab.isEC()) + Warn(ctx) << arg->getSpelling() + << " is not allowed in non-ARM64EC files (" << toString(file) + << ")"; + break; case OPT_defaultlib: if (std::optional path = findLibIfNew(arg->getValue())) enqueuePath(*path, false, false); diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index b6fd3d0daaef9..ea2e7ded38043 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -355,3 +355,4 @@ def tlbid : P_priv<"tlbid">; def tlbout : P_priv<"tlbout">; def verbose_all : P_priv<"verbose">; def guardsym : P_priv<"guardsym">; +def arm64xsameaddress : P_priv<"arm64xsameaddress">; diff --git a/lld/test/COFF/arm64x-sameaddress.test b/lld/test/COFF/arm64x-sameaddress.test new file mode 100644 index 0000000000000..c69be9d268c3b --- /dev/null +++ b/lld/test/COFF/arm64x-sameaddress.test @@ -0,0 +1,56 @@ +REQUIRES: aarch64 +RUN: split-file %s %t.dir && cd %t.dir + +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func-arm64ec.s -o func-arm64ec.obj +RUN: llvm-mc -filetype=obj -triple=aarch64-windows func-arm64.s -o func-arm64.obj +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows drectve.s -o drectve.obj +RUN: llvm-mc -filetype=obj -triple=aarch64-windows drectve.s -o drectve-arm64.obj +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj +RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64.s -o loadconfig-arm64.obj + +RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ +RUN: func-arm64.obj func-arm64ec.obj drectve.obj + +RUN: lld-link -machine:arm64x -dll -noentry -out:out-cmd.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ +RUN: func-arm64.obj func-arm64ec.obj -arm64xsameaddress:func + +RUN: lld-link -machine:arm64ec -dll -noentry -out:out-ec.dll loadconfig-arm64ec.obj func-arm64ec.obj drectve.obj + +RUN: lld-link -machine:arm64x -dll -noentry -out:out-warn.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ +RUN: func-arm64.obj func-arm64ec.obj drectve-arm64.obj 2>&1 | FileCheck --check-prefix=WARN %s +WARN: lld-link: warning: -arm64xsameaddress: is not allowed in non-ARM64EC files (drectve-arm64.obj) + +#--- func-arm64.s + .section .text,"xr",discard,func + .globl func +func: + mov x0, #1 + ret + +#--- func-arm64ec.s + .section .text,"xr",discard,"#func" + .globl "#func" +"#func": + mov x0, #2 + ret + + .weak_anti_dep func + .set func,"#func" + + .section .wowthk,"xr",discard,entry_thunk + .globl entry_thunk +entry_thunk: + mov x0, #3 + ret + + .section .test,"dr" + .rva func + + .section .hybmp$x,"yi" + .symidx "#func" + .symidx entry_thunk + .word 1 + +#--- drectve.s + .section .drectve, "yn" + .ascii " -arm64xsameaddress:func" From 5befd1fb3c97a5c880da4c1e3ae4c8cf7b614425 Mon Sep 17 00:00:00 2001 From: Shafik Yaghmour Date: Thu, 15 May 2025 16:04:37 -0700 Subject: [PATCH 36/83] [Clang][AST] Fix HandleLValueBase to deal with references (#140105) Since P2280R4 Unknown references and pointers was implemented, HandleLValueBase now has to deal with referneces: D.MostDerivedType->getAsCXXRecordDecl() will return a nullptr if D.MostDerivedType is a ReferenceType. The fix is to use getNonReferenceType() to obtain the Pointee Type if we have a reference. Fixes: https://github.com/llvm/llvm-project/issues/139452 (cherry picked from commit 136f2ba2a7bca015ef831c91fb0db5e5e31b7632) # Conflicts: # clang/docs/ReleaseNotes.rst --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/AST/ExprConstant.cpp | 6 +++++- .../SemaCXX/constant-expression-p2280r4.cpp | 21 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 47ef2f80ac3f2..2f43dc4021fd8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -909,6 +909,8 @@ Bug Fixes in This Version being deleted has a potentially throwing destructor (#GH118660). - Clang now outputs correct values when #embed data contains bytes with negative signed char values (#GH102798). +- Fix crash due to unknown references and pointer implementation and handling of + base classes. (GH139452) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 23602362eaa79..e0746f4532245 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3311,7 +3311,11 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, return false; // Extract most-derived object and corresponding type. - DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl(); + // FIXME: After implementing P2280R4 it became possible to get references + // here. We do MostDerivedType->getAsCXXRecordDecl() in several other + // locations and if we see crashes in those locations in the future + // it may make more sense to move this fix into Lvalue::set. + DerivedDecl = D.MostDerivedType.getNonReferenceType()->getAsCXXRecordDecl(); if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength)) return false; diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp index 6c9a87267109c..87beeb4d3dc84 100644 --- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp +++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp @@ -179,3 +179,24 @@ namespace extern_reference_used_as_unknown { int y; constinit int& g = (x,y); // expected-warning {{left operand of comma operator has no effect}} } + +namespace GH139452 { +struct Dummy { + explicit operator bool() const noexcept { return true; } +}; + +struct Base { int error; }; +struct Derived : virtual Base { }; + +template +constexpr R get_value() { + const auto& derived_val = Derived{}; + if (derived_val.error != 0) + /* nothing */; + return R{}; +} + +int f() { + return !get_value(); // contextually convert the function call result to bool +} +} From a169f5ca4e4fdd3e55baef9e69a3fa92e79d7b4a Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 29 Apr 2025 14:35:15 +0200 Subject: [PATCH 37/83] Correct position of CFI Instruction for Pointer Authentication" This a partial of reverts commit 0b73b5af60f2c544892b9dd68b4fa43eeff52fc1. Fixes #137802 --- .../lib/Target/AArch64/AArch64PointerAuth.cpp | 13 ++++-- .../machine-outliner-retaddr-sign-cfi.ll | 2 +- ...tliner-retaddr-sign-diff-scope-same-key.ll | 12 ++--- .../machine-outliner-retaddr-sign-non-leaf.ll | 12 ++--- .../machine-outliner-retaddr-sign-regsave.mir | 2 +- ...tliner-retaddr-sign-same-scope-diff-key.ll | 12 ++--- .../machine-outliner-retaddr-sign-sp-mod.mir | 34 +++++++------- ...machine-outliner-retaddr-sign-subtarget.ll | 6 +-- .../machine-outliner-retaddr-sign-thunk.ll | 18 ++++---- .../AArch64/pacbti-llvm-generated-funcs-2.ll | 4 +- ...sign-return-address-cfi-negate-ra-state.ll | 24 +++++----- .../CodeGen/AArch64/sign-return-address.ll | 44 +++++++++---------- .../MIR/AArch64/return-address-signing.mir | 4 +- 13 files changed, 96 insertions(+), 91 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp index c3bc70ad6f427..aba84574c7526 100644 --- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp +++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp @@ -152,12 +152,15 @@ void AArch64PointerAuth::signLR(MachineFunction &MF, ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel()); } else { BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup); - emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI); + if (MFnI.branchProtectionPAuthLR()) + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI); BuildMI(MBB, MBBI, DL, TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP : AArch64::PACIASP)) .setMIFlag(MachineInstr::FrameSetup) ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel()); + if (!MFnI.branchProtectionPAuthLR()) + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI); } if (!EmitCFI && NeedsWinCFI) { @@ -220,11 +223,15 @@ void AArch64PointerAuth::authenticateLR( .setMIFlag(MachineInstr::FrameDestroy); } else { BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym); - emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, - EmitAsyncCFI); + if (MFnI->branchProtectionPAuthLR()) + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, + EmitAsyncCFI); BuildMI(MBB, MBBI, DL, TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP)) .setMIFlag(MachineInstr::FrameDestroy); + if (!MFnI->branchProtectionPAuthLR()) + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, + EmitAsyncCFI); } if (NeedsWinCFI) { diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll index e7de54036245a..4bbbe40176313 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll @@ -9,9 +9,9 @@ define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" { ; CHECK-LABEL: a: // @a ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame -; CHECK-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #27 ; V83A-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll index a26dda1d5c1f1..6a11bef08c740 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll @@ -5,9 +5,9 @@ define void @a() "sign-return-address"="all" { ; CHECK-LABEL: a: // @a -; CHECK: .cfi_negate_ra_state -; V8A-NEXT: hint #25 -; V83A-NEXT: paciasp +; V8A: hint #25 +; V83A: paciasp +; CHECK-NEXT: .cfi_negate_ra_state %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -52,9 +52,9 @@ define void @b() "sign-return-address"="non-leaf" { define void @c() "sign-return-address"="all" { ; CHECK-LABEL: c: // @c -; CHECK: .cfi_negate_ra_state -; V8A-NEXT: hint #25 -; V83A-NEXT: paciasp +; V8A: hint #25 +; V83A: paciasp +; CHECK-NEXT .cfi_negate_ra_state %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll index 064b2b78c7bc7..1e7224683c6c8 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll @@ -8,8 +8,8 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V8A-LABEL: a: ; V8A: // %bb.0: ; V8A-NEXT: .cfi_b_key_frame -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 ; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 32 @@ -26,8 +26,8 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V83A-LABEL: a: ; V83A: // %bb.0: ; V83A-NEXT: .cfi_b_key_frame -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 ; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 32 @@ -59,8 +59,8 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V8A-LABEL: b: ; V8A: // %bb.0: ; V8A-NEXT: .cfi_b_key_frame -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 ; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 32 @@ -77,8 +77,8 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V83A-LABEL: b: ; V83A: // %bb.0: ; V83A-NEXT: .cfi_b_key_frame -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 ; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 32 @@ -110,8 +110,8 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V8A-LABEL: c: ; V8A: // %bb.0: ; V8A-NEXT: .cfi_b_key_frame -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 ; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 32 @@ -128,8 +128,8 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V83A-LABEL: c: ; V83A: // %bb.0: ; V83A-NEXT: .cfi_b_key_frame -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 ; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 32 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir index 218ee6609c803..9a983cbd6714e 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir @@ -81,8 +81,8 @@ body: | # CHECK: name: bar # CHECK: bb.0: # CHECK: frame-setup EMITBKEY -# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NOT: OUTLINED_FUNCTION_ # CHECK: bb.1: # CHECK-NOT: OUTLINED_FUNCTION_ diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll index 5c45373d8c1d6..87771f5de4f69 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll @@ -7,8 +7,8 @@ define void @a() "sign-return-address"="all" { ; V8A-LABEL: a: ; V8A: // %bb.0: -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 ; V8A-NEXT: .cfi_def_cfa_offset 32 ; V8A-NEXT: mov w8, #1 // =0x1 @@ -26,8 +26,8 @@ define void @a() "sign-return-address"="all" { ; ; V83A-LABEL: a: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 ; V83A-NEXT: .cfi_def_cfa_offset 32 ; V83A-NEXT: mov w8, #1 // =0x1 @@ -60,8 +60,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" { ; V8A-LABEL: b: ; V8A: // %bb.0: ; V8A-NEXT: .cfi_b_key_frame -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 ; V8A-NEXT: .cfi_def_cfa_offset 32 ; V8A-NEXT: mov w8, #1 // =0x1 @@ -80,8 +80,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" { ; V83A-LABEL: b: ; V83A: // %bb.0: ; V83A-NEXT: .cfi_b_key_frame -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 ; V83A-NEXT: .cfi_def_cfa_offset 32 ; V83A-NEXT: mov w8, #1 // =0x1 @@ -113,8 +113,8 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" { define void @c() "sign-return-address"="all" { ; V8A-LABEL: c: ; V8A: // %bb.0: -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 ; V8A-NEXT: .cfi_def_cfa_offset 32 ; V8A-NEXT: mov w8, #1 // =0x1 @@ -132,8 +132,8 @@ define void @c() "sign-return-address"="all" { ; ; V83A-LABEL: c: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 ; V83A-NEXT: .cfi_def_cfa_offset 32 ; V83A-NEXT: mov w8, #1 // =0x1 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir index d4a4b886ec0e3..22e5edef2a939 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir @@ -86,11 +86,11 @@ body: | # CHECK: body: | # CHECK-NEXT: bb.0 (%ir-block.0): # CHECK-NEXT: liveins: $lr -# CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK: BL @[[OUTLINED_FUNC:OUTLINED_FUNCTION_[0-9]+]] -# CHECK: frame-destroy CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: RET undef $lr ... @@ -119,11 +119,11 @@ body: | # CHECK: body: | # CHECK-NEXT: bb.0 (%ir-block.0): # CHECK-NEXT: liveins: $lr -# CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK: BL @[[OUTLINED_FUNC]] -# CHECK: frame-destroy CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: RET undef $lr ... @@ -174,22 +174,22 @@ body: | # CHECK: body: | # CHECK-NEXT: bb.0 (%ir-block.0): # CHECK-NEXT: liveins: $lr -# CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NOT: BL @OUTLINED_FUNCTION_{{.*}} -# CHECK: frame-destroy CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: RET undef $lr # CHECK-LABEL: name: illegal1 # CHECK: body: | # CHECK-NEXT: bb.0 (%ir-block.0): # CHECK-NEXT: liveins: $lr -# CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NOT: BL @OUTLINED_FUNCTION_{{.*}} -# CHECK: frame-destroy CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: RET undef $lr # Outlined function that contains only legal sp modifications @@ -198,8 +198,8 @@ body: | # CHECK-NEXT: bb.0: # CHECK-NEXT: liveins: $lr # CHECK-NEXT: {{^ $}} -# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll index cb43b3ba3e47e..a7ea32952f3b7 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll @@ -9,8 +9,8 @@ define void @a() #0 { ; CHECK-LABEL: a: // @a ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NOT: OUTLINED_FUNCTION_ %1 = alloca i32, align 4 %2 = alloca i32, align 4 @@ -33,8 +33,8 @@ define void @b() #0 { ; CHECK-LABEL: b: // @b ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NOT: OUTLINED_FUNCTION_ %1 = alloca i32, align 4 %2 = alloca i32, align 4 @@ -57,8 +57,8 @@ define void @c() #1 { ; CHECK-LABEL: c: // @c ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: hint #27 +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NOT: OUTLINED_FUNCTION_ %1 = alloca i32, align 4 %2 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll index 0ba4455532925..da68ea5bf0dbc 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll @@ -10,8 +10,8 @@ declare i32 @thunk_called_fn(i32, i32, i32, i32) define i32 @a() #0 { ; V8A-LABEL: a: ; V8A: // %bb.0: // %entry -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 16 ; V8A-NEXT: .cfi_offset w30, -16 @@ -27,8 +27,8 @@ define i32 @a() #0 { ; ; V83A-LABEL: a: ; V83A: // %bb.0: // %entry -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -49,8 +49,8 @@ entry: define i32 @b() #0 { ; V8A-LABEL: b: ; V8A: // %bb.0: // %entry -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 16 ; V8A-NEXT: .cfi_offset w30, -16 @@ -66,8 +66,8 @@ define i32 @b() #0 { ; ; V83A-LABEL: b: ; V83A: // %bb.0: // %entry -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -88,8 +88,8 @@ entry: define hidden i32 @c(ptr %fptr) #0 { ; V8A-LABEL: c: ; V8A: // %bb.0: // %entry -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 16 ; V8A-NEXT: .cfi_offset w30, -16 @@ -106,8 +106,8 @@ define hidden i32 @c(ptr %fptr) #0 { ; ; V83A-LABEL: c: ; V83A: // %bb.0: // %entry -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -129,8 +129,8 @@ entry: define hidden i32 @d(ptr %fptr) #0 { ; V8A-LABEL: d: ; V8A: // %bb.0: // %entry -; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V8A-NEXT: .cfi_def_cfa_offset 16 ; V8A-NEXT: .cfi_offset w30, -16 @@ -147,8 +147,8 @@ define hidden i32 @d(ptr %fptr) #0 { ; ; V83A-LABEL: d: ; V83A: // %bb.0: // %entry -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -176,5 +176,3 @@ attributes #0 = { "sign-return-address"="non-leaf" minsize } ; CHECK-NOT: .cfi_negate_ra_state ; CHECK-NOT: auti{{[a,b]}}sp ; CHECK-NOT: hint #{{[29,31]}} -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll index f823d2aa82ac0..373c4969a9405 100644 --- a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll +++ b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll @@ -34,8 +34,8 @@ entry: } ;; CHECK-LABEL: __llvm_gcov_writeout: ;; CHECK: .cfi_b_key_frame -;; CHECK-NEXT: .cfi_negate_ra_state ;; CHECK-NEXT: pacibsp +;; CHECK-NEXT: .cfi_negate_ra_state define internal void @__llvm_gcov_reset() unnamed_addr #2 { entry: @@ -54,9 +54,9 @@ entry: } ;; CHECK-LABEL: __llvm_gcov_init: ;; CHECK: .cfi_b_key_frame +;; CHECK-NEXT: pacibsp ;; CHECK-NEXT: .cfi_negate_ra_state ;; CHECK-NOT: .cfi_ -;; CHECK-NEXT: pacibsp ;; CHECK: .cfi_endproc attributes #0 = { norecurse nounwind readnone "sign-return-address"="all" "sign-return-address-key"="b_key" } diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll index 6ea072846d47c..4d4b7c215b978 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll @@ -9,8 +9,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 { ; CHECK-V8A-LABEL: _Z3fooi: ; CHECK-V8A: // %bb.0: // %entry -; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: hint #25 +; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 @@ -27,8 +27,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 { ; ; CHECK-V83A-LABEL: _Z3fooi: ; CHECK-V83A: // %bb.0: // %entry -; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: paciasp +; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 @@ -62,8 +62,8 @@ return: ; No predecessors! define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) { ; CHECK-V8A-LABEL: baz_async: ; CHECK-V8A: // %bb.0: // %entry -; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: hint #25 +; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 @@ -74,8 +74,8 @@ define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) { ; CHECK-V8A-NEXT: bl _Z3bari ; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 0 -; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: hint #29 +; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: .cfi_restore w30 ; CHECK-V8A-NEXT: b _Z3bari ; CHECK-V8A-NEXT: .LBB1_2: // %if.else @@ -84,15 +84,15 @@ define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) { ; CHECK-V8A-NEXT: add w0, w0, #1 ; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 0 -; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: hint #29 +; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: .cfi_restore w30 ; CHECK-V8A-NEXT: ret ; ; CHECK-V83A-LABEL: baz_async: ; CHECK-V83A: // %bb.0: // %entry -; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: paciasp +; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 @@ -103,8 +103,8 @@ define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) { ; CHECK-V83A-NEXT: bl _Z3bari ; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 0 -; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: autiasp +; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: .cfi_restore w30 ; CHECK-V83A-NEXT: b _Z3bari ; CHECK-V83A-NEXT: .LBB1_2: // %if.else @@ -143,8 +143,8 @@ return: ; preds = %if.else, %if.then define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) { ; CHECK-V8A-LABEL: baz_sync: ; CHECK-V8A: // %bb.0: // %entry -; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: hint #25 +; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 @@ -164,8 +164,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) { ; ; CHECK-V83A-LABEL: baz_sync: ; CHECK-V83A: // %bb.0: // %entry -; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: paciasp +; CHECK-V83A-NEXT: .cfi_negate_ra_state ; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 @@ -216,7 +216,7 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP-NOT: DW_CFA_remember_state ; CHECK-DUMP-NOT: DW_CFA_restore_state -; CHECK-DUMP: CFA=WSP +; CHECK-DUMP: CFA=WSP{{$}} ; CHECK-DUMP: reg34=1 ; CHECK-DUMP-NOT: reg34=0 @@ -229,7 +229,6 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP: DW_CFA_restore_state: ; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state: -; CHECK-DUMP: CFA=WSP ;; First DW_CFA_AARCH64_negate_ra_state: ; CHECK-DUMP: reg34=1 ;; Second DW_CFA_AARCH64_negate_ra_state: @@ -238,6 +237,7 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP: reg34=1 ;; Third DW_CFA_AARCH64_negate_ra_state: ; CHECK-DUMP: reg34=0 +; CHECK-DUMP-NOT: reg34=1 ; baz_sync ; CHECK-DUMP-LABEL: FDE @@ -246,6 +246,6 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP-NOT: DW_CFA_remember_state ; CHECK-DUMP-NOT: DW_CFA_restore_state -; CHECK-DUMP: CFA=WSP +; CHECK-DUMP: CFA=WSP{{$}} ; CHECK-DUMP: reg34=1 ; CHECK-DUMP-NOT: reg34=0 diff --git a/llvm/test/CodeGen/AArch64/sign-return-address.ll b/llvm/test/CodeGen/AArch64/sign-return-address.ll index e0ee0d84ab4f1..dafe0d71ceb5f 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address.ll @@ -29,15 +29,15 @@ define i32 @leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf" { define i32 @leaf_sign_all(i32 %x) "sign-return-address"="all" { ; COMPAT-LABEL: leaf_sign_all: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret ; ; V83A-LABEL: leaf_sign_all: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: retaa ret i32 %x } @@ -45,8 +45,8 @@ define i32 @leaf_sign_all(i32 %x) "sign-return-address"="all" { define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf" { ; COMPAT-LABEL: leaf_clobbers_lr: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 @@ -59,8 +59,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf" { ; ; V83A-LABEL: leaf_clobbers_lr: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -78,8 +78,8 @@ declare i32 @foo(i32) define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" { ; COMPAT-LABEL: non_leaf_sign_all: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 @@ -90,8 +90,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" { ; ; V83A-LABEL: non_leaf_sign_all: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -105,8 +105,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" { define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf" { ; COMPAT-LABEL: non_leaf_sign_non_leaf: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 @@ -117,8 +117,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf" { ; ; V83A-LABEL: non_leaf_sign_non_leaf: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -135,8 +135,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [x18], #8 ; CHECK-NEXT: .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 // -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: paciasp +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -152,8 +152,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac define i32 @leaf_sign_all_v83(i32 %x) "sign-return-address"="all" "target-features"="+v8.3a" { ; CHECK-LABEL: leaf_sign_all_v83: ; CHECK: // %bb.0: -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: paciasp +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: retaa ret i32 %x } @@ -163,8 +163,8 @@ declare fastcc i64 @bar(i64) define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" { ; COMPAT-LABEL: spill_lr_and_tail_call: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 @@ -177,8 +177,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" { ; ; V83A-LABEL: spill_lr_and_tail_call: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 @@ -196,15 +196,15 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" { define i32 @leaf_sign_all_a_key(i32 %x) "sign-return-address"="all" "sign-return-address-key"="a_key" { ; COMPAT-LABEL: leaf_sign_all_a_key: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret ; ; V83A-LABEL: leaf_sign_all_a_key: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: retaa ret i32 %x } @@ -213,16 +213,16 @@ define i32 @leaf_sign_all_b_key(i32 %x) "sign-return-address"="all" "sign-return ; COMPAT-LABEL: leaf_sign_all_b_key: ; COMPAT: // %bb.0: ; COMPAT-NEXT: .cfi_b_key_frame -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #27 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #31 ; COMPAT-NEXT: ret ; ; V83A-LABEL: leaf_sign_all_b_key: ; V83A: // %bb.0: ; V83A-NEXT: .cfi_b_key_frame -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: retab ret i32 %x } @@ -231,8 +231,8 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "sign-return-address"="all" "target- ; CHECK-LABEL: leaf_sign_all_v83_b_key: ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: retab ret i32 %x } @@ -241,15 +241,15 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "sign-return-address"="all" "target- define i32 @leaf_sign_all_a_key_bti(i32 %x) "sign-return-address"="all" "sign-return-address-key"="a_key" "branch-target-enforcement"{ ; COMPAT-LABEL: leaf_sign_all_a_key_bti: ; COMPAT: // %bb.0: -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret ; ; V83A-LABEL: leaf_sign_all_a_key_bti: ; V83A: // %bb.0: -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: retaa ret i32 %x } @@ -259,16 +259,16 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "sign-return-address"="all" "sign-re ; COMPAT-LABEL: leaf_sign_all_b_key_bti: ; COMPAT: // %bb.0: ; COMPAT-NEXT: .cfi_b_key_frame -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #27 +; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: hint #31 ; COMPAT-NEXT: ret ; ; V83A-LABEL: leaf_sign_all_b_key_bti: ; V83A: // %bb.0: ; V83A-NEXT: .cfi_b_key_frame -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: retab ret i32 %x } @@ -278,8 +278,8 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "sign-return-address"="all" "tar ; CHECK-LABEL: leaf_sign_all_v83_b_key_bti: ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: pacibsp +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: retab ret i32 %x } diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir index b2abff75880c9..1030917c87419 100644 --- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir +++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir @@ -25,8 +25,8 @@ alignment: 4 tracksRegLiveness: true frameInfo: maxCallFrameSize: 0 -#CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state #CHECK: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp +#CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state #CHECK: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp body: | bb.0.entry: @@ -42,8 +42,8 @@ tracksRegLiveness: true frameInfo: maxCallFrameSize: 0 #CHECK: frame-setup EMITBKEY -#CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state #CHECK: frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp +#CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state #CHECK: frame-destroy AUTIBSP implicit-def $lr, implicit $lr, implicit $sp body: | bb.0.entry: From 070cf62530eab91d68521787e08418343b9ca28b Mon Sep 17 00:00:00 2001 From: cor3ntin Date: Tue, 18 Mar 2025 16:45:37 +0100 Subject: [PATCH 38/83] [Clang] Demote mixed enumeration arithmetic error to a warning (#131811) In C++, defaulted to an error. C++ removed these features but the removal negatively impacts users. Fixes #92340 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 +++++- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/test/SemaCXX/cxx2c-enum-compare.cpp | 5 +++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2f43dc4021fd8..774a00b4feef5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -314,6 +314,9 @@ C++2c Feature Support - Implemented `P3176R1 The Oxford variadic comma `_ +- The error produced when doing arithmetic operations on enums of different types + can be disabled with ``-Wno-enum-enum-conversion``. (#GH92340) + C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ - Removed the restriction to literal types in constexpr functions in C++23 mode. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ec2a140e04d5b..7180447e250ce 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7567,9 +7567,13 @@ def warn_arith_conv_mixed_enum_types_cxx20 : Warning< "%sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2 is deprecated">, InGroup; -def err_conv_mixed_enum_types_cxx26 : Error< + +def err_conv_mixed_enum_types: Error < "invalid %sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2">; +def zzzz_warn_conv_mixed_enum_types_cxx26 : Warning < + err_conv_mixed_enum_types.Summary>, + InGroup, DefaultError; def warn_arith_conv_mixed_anon_enum_types : Warning< warn_arith_conv_mixed_enum_types.Summary>, diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e253e3a17328f..23d0f9532d4f8 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1519,7 +1519,7 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS, // In C++ 26, usual arithmetic conversions between 2 different enum types // are ill-formed. if (S.getLangOpts().CPlusPlus26) - DiagID = diag::err_conv_mixed_enum_types_cxx26; + DiagID = diag::zzzz_warn_conv_mixed_enum_types_cxx26; else if (!L->castAs()->getDecl()->hasNameForLinkage() || !R->castAs()->getDecl()->hasNameForLinkage()) { // If either enumeration type is unnamed, it's less likely that the diff --git a/clang/test/SemaCXX/cxx2c-enum-compare.cpp b/clang/test/SemaCXX/cxx2c-enum-compare.cpp index f47278a60725e..96fbd368b1696 100644 --- a/clang/test/SemaCXX/cxx2c-enum-compare.cpp +++ b/clang/test/SemaCXX/cxx2c-enum-compare.cpp @@ -1,9 +1,10 @@ -// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify -triple %itanium_abi_triple +// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both,expected +// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both -Wno-enum-enum-conversion enum E1 { e }; enum E2 { f }; void test() { - int b = e <= 3.7; // expected-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}} + int b = e <= 3.7; // both-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}} int k = f - e; // expected-error {{invalid arithmetic between different enumeration types ('E2' and 'E1')}} int x = 1 ? e : f; // expected-error {{invalid conditional expression between different enumeration types ('E1' and 'E2')}} } From 8a36b8e3ab46d69ee1798ded75038715b2080f83 Mon Sep 17 00:00:00 2001 From: Fangyi Zhou Date: Thu, 17 Apr 2025 23:02:37 +0100 Subject: [PATCH 39/83] [clang][analyzer] Handle CXXParenInitListExpr alongside InitListExpr As reported in #135665, C++20 parenthesis initializer list expressions are not handled correctly and were causing crashes. This commit attempts to fix the issue by handing parenthesis initializer lists along side existing initializer lists. (cherry picked from commit 5dc9d55eb04d94c01dba0364b51a509f975e542a) --- clang/docs/ReleaseNotes.rst | 3 +++ .../Checkers/DynamicTypePropagation.cpp | 6 +++--- .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 10 ++++++---- clang/test/Analysis/PR135665.cpp | 19 +++++++++++++++++++ 4 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 clang/test/Analysis/PR135665.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 774a00b4feef5..7b84210fddab3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1472,6 +1472,9 @@ Crash and bug fixes - The ``unix.BlockInCriticalSection`` now recognizes the ``lock()`` member function as expected, even if it's inherited from a base class. Fixes (#GH104241). +- Fixed a crash when C++20 parenthesized initializer lists are used. This issue + was causing a crash in clang-tidy. (#GH136041) + Improvements ^^^^^^^^^^^^ diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp index a0bf776b11f53..e58329817d7cd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -379,9 +379,9 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call, // aggregates, and in such case no top-frame constructor will be called. // Figure out if we need to do anything in this case. // FIXME: Instead of relying on the ParentMap, we should have the - // trigger-statement (InitListExpr in this case) available in this - // callback, ideally as part of CallEvent. - if (isa_and_nonnull( + // trigger-statement (InitListExpr or CXXParenListInitExpr in this case) + // available in this callback, ideally as part of CallEvent. + if (isa_and_nonnull( LCtx->getParentMap().getParent(Ctor->getOriginExpr()))) return; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index f7020da2e6da2..30839a40389ba 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -637,9 +637,10 @@ void ExprEngine::handleConstructor(const Expr *E, // FIXME: For now this code essentially bails out. We need to find the // correct target region and set it. // FIXME: Instead of relying on the ParentMap, we should have the - // trigger-statement (InitListExpr in this case) passed down from CFG or - // otherwise always available during construction. - if (isa_and_nonnull(LCtx->getParentMap().getParent(E))) { + // trigger-statement (InitListExpr or CXXParenListInitExpr in this case) + // passed down from CFG or otherwise always available during construction. + if (isa_and_nonnull( + LCtx->getParentMap().getParent(E))) { MemRegionManager &MRMgr = getSValBuilder().getRegionManager(); Target = loc::MemRegionVal(MRMgr.getCXXTempObjectRegion(E, LCtx)); CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true; @@ -1010,7 +1011,8 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred, // values are properly placed inside the required region, however if an // initializer list is used, this doesn't happen automatically. auto *Init = CNE->getInitializer(); - bool isInitList = isa_and_nonnull(Init); + bool isInitList = + isa_and_nonnull(Init); QualType ObjTy = isInitList ? Init->getType() : CNE->getType()->getPointeeType(); diff --git a/clang/test/Analysis/PR135665.cpp b/clang/test/Analysis/PR135665.cpp new file mode 100644 index 0000000000000..124b8c9b97b04 --- /dev/null +++ b/clang/test/Analysis/PR135665.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_analyze_cc1 -std=c++20 -analyzer-checker=core -verify %s + +// expected-no-diagnostics + +template +struct overload : public F... +{ + using F::operator()...; +}; + +template +overload(F&&...) -> overload; + +int main() +{ + const auto l = overload([](const int* i) {}); + + return 0; +} From 9b0832508ede38397c8ebb7a348d50ce1517af4a Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 20 May 2025 10:43:50 +0100 Subject: [PATCH 40/83] [SDAG] Ensure load is included in output chain of sincos expansion (#140525) The load not being included in the chain meant that it could materialize after a `@llvm.lifetime.end` annotation on the pointer. This could result in miscompiles if the stack slot is reused for another value. Fixes https://github.com/llvm/llvm-project/issues/140491 (cherry picked from commit c9d62491981fe720c1b3255fa2f9ddf744590c65) --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 ++- .../CodeGen/X86/pr140491-sincos-lifetimes.ll | 70 +++++++++++++++++++ 2 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b416c0efbbc4f..eecfb41c2d319 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2660,16 +2660,19 @@ bool SelectionDAG::expandMultipleResultFPLibCall( continue; } MachinePointerInfo PtrInfo; + SDValue LoadResult = + getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); + SDValue OutChain = LoadResult.getValue(1); + if (StoreSDNode *ST = ResultStores[ResNo]) { // Replace store with the library call. - ReplaceAllUsesOfValueWith(SDValue(ST, 0), CallChain); + ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); PtrInfo = ST->getPointerInfo(); } else { PtrInfo = MachinePointerInfo::getFixedStack( getMachineFunction(), cast(ResultPtr)->getIndex()); } - SDValue LoadResult = - getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); + Results.push_back(LoadResult); } diff --git a/llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll b/llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll new file mode 100644 index 0000000000000..2ca99bdc4b316 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr140491-sincos-lifetimes.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s | FileCheck %s + +; This test is reduced from https://github.com/llvm/llvm-project/issues/140491. +; It checks that when `@llvm.sincos.f32` is expanded to a call to +; `sincosf(float, float* out_sin, float* out_cos)` and the store of `%cos` to +; `%computed` is folded into the `sincosf` call. The use of `%cos`in the later +; `fneg %cos` -- which expands to a load of `%computed`, will perform the load +; before the `@llvm.lifetime.end.p0(%computed)` to ensure the correct value is +; taken for `%cos`. + +target triple = "x86_64-sie-ps5" + +declare void @use_ptr(ptr readonly) + +define i32 @sincos_stack_slot_with_lifetime(float %in) { +; CHECK-LABEL: sincos_stack_slot_with_lifetime: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: leaq 12(%rsp), %rdi +; CHECK-NEXT: leaq 8(%rsp), %rbx +; CHECK-NEXT: movq %rbx, %rsi +; CHECK-NEXT: callq sincosf@PLT +; CHECK-NEXT: movss 8(%rsp), %xmm0 # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movaps %xmm0, 16(%rsp) # 16-byte Spill +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq use_ptr +; CHECK-NEXT: movss 12(%rsp), %xmm0 # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: movss %xmm0, 8(%rsp) +; CHECK-NEXT: leaq 8(%rsp), %rdi +; CHECK-NEXT: callq use_ptr +; CHECK-NEXT: movaps 16(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: movss %xmm0, 8(%rsp) +; CHECK-NEXT: leaq 8(%rsp), %rdi +; CHECK-NEXT: callq use_ptr +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %computed = alloca float, align 4 + %computed1 = alloca float, align 4 + %computed3 = alloca float, align 4 + %sincos = tail call { float, float } @llvm.sincos.f32(float %in) + %sin = extractvalue { float, float } %sincos, 0 + %cos = extractvalue { float, float } %sincos, 1 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %computed) + store float %cos, ptr %computed, align 4 + call void @use_ptr(ptr nonnull %computed) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %computed) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %computed1) + %fneg_sin = fneg float %sin + store float %fneg_sin, ptr %computed1, align 4 + call void @use_ptr(ptr nonnull %computed1) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %computed1) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %computed3) + %fneg_cos = fneg float %cos + store float %fneg_cos, ptr %computed3, align 4 + call void @use_ptr(ptr nonnull %computed3) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %computed3) + ret i32 0 +} + From 802f4f75372ed66c07f1230666b170c43afbb937 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 20 May 2025 19:15:57 -0700 Subject: [PATCH 41/83] [clang-format] Handle raw string literals containing JSON code (#140666) Fix #65400 (cherry picked from commit 0dfdf7efbfe347517eb4c7f544043a71af4e4a25) --- clang/lib/Format/Format.cpp | 6 +++-- clang/tools/clang-format/ClangFormat.cpp | 4 ++-- .../unittests/Format/FormatTestRawStrings.cpp | 22 +++++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index b97d8928178b5..aba7db6dd50a8 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3743,8 +3743,10 @@ reformat(const FormatStyle &Style, StringRef Code, tooling::Replacements Replaces = Formatter(*Env, Style, Status).process().first; // add a replacement to remove the "x = " from the result. - Replaces = Replaces.merge( - tooling::Replacements(tooling::Replacement(FileName, 0, 4, ""))); + if (Code.starts_with("x = ")) { + Replaces = Replaces.merge( + tooling::Replacements(tooling::Replacement(FileName, 0, 4, ""))); + } // apply the reformatting changes and the removal of "x = ". if (applyAllReplacements(Code, Replaces)) return {Replaces, 0}; diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 28610052b9b74..96eec7c666a38 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -492,8 +492,8 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) { // To format JSON insert a variable to trick the code into thinking its // JavaScript. if (IsJson && !FormatStyle->DisableFormat) { - auto Err = Replaces.add(tooling::Replacement( - tooling::Replacement(AssumedFileName, 0, 0, "x = "))); + auto Err = + Replaces.add(tooling::Replacement(AssumedFileName, 0, 0, "x = ")); if (Err) llvm::errs() << "Bad Json variable insertion\n"; } diff --git a/clang/unittests/Format/FormatTestRawStrings.cpp b/clang/unittests/Format/FormatTestRawStrings.cpp index 0615fb1fad4c5..3f09c7b6086e5 100644 --- a/clang/unittests/Format/FormatTestRawStrings.cpp +++ b/clang/unittests/Format/FormatTestRawStrings.cpp @@ -988,6 +988,28 @@ fffffffffffffffffffff("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", )pb");)test", Style)); } + +TEST_F(FormatTestRawStrings, Json) { + auto Style = getLLVMStyle(); + Style.RawStringFormats = { + { + /*Language=*/FormatStyle::LK_Json, + /*Delimiters=*/{"json"}, + /*EnclosingFunctions=*/{}, + /*CanonicalDelimiter=*/"", + /*BasedOnStyle=*/"llvm", + }, + }; + + EXPECT_EQ("json = R\"json({\n" + " \"str\": \"test\"\n" + " })json\";", + format("json = R\"json({\n" + " \"str\": \"test\"\n" + "})json\";", + Style)); +} + } // end namespace } // end namespace format } // end namespace clang From 15ec590e389b57f47d817f32d34e486c5ab37f70 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Wed, 21 May 2025 21:37:54 -0700 Subject: [PATCH 42/83] release/20.x: [clang-format] Fix the indent of StartOfName after AttributeMacro (#140361) Backport 0cac25bcf5a246eb8a1f02d5041731ae9a6f00e0 --- clang/lib/Format/ContinuationIndenter.cpp | 4 +++- clang/unittests/Format/FormatTest.cpp | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 6f7d213c0b559..d953348b0258d 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1452,7 +1452,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->ClosesRequiresClause || (PreviousNonComment->is(TT_AttributeMacro) && - Current.isNot(tok::l_paren)) || + Current.isNot(tok::l_paren) && + !Current.endsSequence(TT_StartOfName, TT_AttributeMacro, + TT_PointerOrReference)) || PreviousNonComment->isOneOf( TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 90a79230e9f4c..1afcc75a2e19e 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -12419,6 +12419,13 @@ TEST_F(FormatTest, UnderstandsAttributes) { verifyFormat("SomeType s __unused{InitValue};", CustomAttrs); verifyFormat("SomeType *__capability s(InitValue);", CustomAttrs); verifyFormat("SomeType *__capability s{InitValue};", CustomAttrs); + + auto Style = getLLVMStyleWithColumns(60); + Style.AttributeMacros.push_back("my_fancy_attr"); + Style.PointerAlignment = FormatStyle::PAS_Left; + verifyFormat("void foo(const MyLongTypeNameeeeeeeeeeeee* my_fancy_attr\n" + " testttttttttt);", + Style); } TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { From 6fcb1c127b407cd6cb731bcc39efb360643b8d25 Mon Sep 17 00:00:00 2001 From: hev Date: Thu, 22 May 2025 18:50:40 +0800 Subject: [PATCH 43/83] [LoongArch] Fix assertion failure for annotate tablejump (#140907) Fix a use-after-free issue related to annotateTableJump in the LoongArch target. Previously, `LoongArchPreRAExpandPseudo::annotateTableJump()` recorded a reference to a MachineOperand representing a jump table index. However, later optimizations such as the `BranchFolder` pass may delete the instruction containing this operand, leaving a dangling reference. This led to an assertion failure in `LoongArchAsmPrinter::emitJumpTableInfo()` when trying to access a freed MachineOperand via `getIndex()`. The fix avoids holding a reference to the MachineOperand. Instead, we extract and store the jump table index at the time of annotation. During `emitJumpTableInfo()`, we verify whether the recorded index still exists in the MachineFunction's jump table. If not, we skip emission for that entry. Fixes #140904 (cherry picked from commit 4e186f20e2f2be2fbf95d9713341a0b6507e707d) --- llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp | 14 +++++++++----- .../LoongArch/LoongArchExpandPseudoInsts.cpp | 3 ++- .../LoongArch/LoongArchMachineFunctionInfo.h | 10 ++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index 895a8e2646692..9a383f0a79a5c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -265,13 +265,16 @@ void LoongArchAsmPrinter::emitJumpTableInfo() { assert(TM.getTargetTriple().isOSBinFormatELF()); - unsigned Size = getDataLayout().getPointerSize(); auto *LAFI = MF->getInfo(); unsigned EntrySize = LAFI->getJumpInfoSize(); + auto JTI = MF->getJumpTableInfo(); - if (0 == EntrySize) + if (!JTI || 0 == EntrySize) return; + unsigned Size = getDataLayout().getPointerSize(); + auto JT = JTI->getJumpTables(); + // Emit an additional section to store the correlation info as pairs of // addresses, each pair contains the address of a jump instruction (jr) and // the address of the jump table. @@ -279,14 +282,15 @@ void LoongArchAsmPrinter::emitJumpTableInfo() { ".discard.tablejump_annotate", ELF::SHT_PROGBITS, 0)); for (unsigned Idx = 0; Idx < EntrySize; ++Idx) { + int JTIIdx = LAFI->getJumpInfoJTIIndex(Idx); + if (JT[JTIIdx].MBBs.empty()) + continue; OutStreamer->emitValue( MCSymbolRefExpr::create(LAFI->getJumpInfoJrMI(Idx)->getPreInstrSymbol(), OutContext), Size); OutStreamer->emitValue( - MCSymbolRefExpr::create( - GetJTISymbol(LAFI->getJumpInfoJTIMO(Idx)->getIndex()), OutContext), - Size); + MCSymbolRefExpr::create(GetJTISymbol(JTIIdx), OutContext), Size); } } diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index c2d73a260b1c1..2107908be34ca 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -638,7 +638,8 @@ void LoongArchPreRAExpandPseudo::annotateTableJump( if (MO.isJTI()) { MBBI->setPreInstrSymbol( *MF, MF->getContext().createNamedTempSymbol("jrtb_")); - MF->getInfo()->setJumpInfo(&*MBBI, &MO); + MF->getInfo()->setJumpInfo( + &*MBBI, MO.getIndex()); IsFound = true; return; } diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h index daa47c4dc7e32..904985c189dba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -41,7 +41,7 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo { /// Pairs of `jr` instructions and corresponding JTI operands, used for the /// `annotate-tablejump` option. - SmallVector, 4> JumpInfos; + SmallVector, 4> JumpInfos; public: LoongArchMachineFunctionInfo(const Function &F, @@ -76,14 +76,12 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo { return is_contained(SExt32Registers, Reg); } - void setJumpInfo(MachineInstr *JrMI, MachineOperand *JTIMO) { - JumpInfos.push_back(std::make_pair(JrMI, JTIMO)); + void setJumpInfo(MachineInstr *JrMI, int JTIIdx) { + JumpInfos.push_back(std::make_pair(JrMI, JTIIdx)); } unsigned getJumpInfoSize() { return JumpInfos.size(); } MachineInstr *getJumpInfoJrMI(unsigned Idx) { return JumpInfos[Idx].first; } - MachineOperand *getJumpInfoJTIMO(unsigned Idx) { - return JumpInfos[Idx].second; - } + int getJumpInfoJTIIndex(unsigned Idx) { return JumpInfos[Idx].second; } }; } // end namespace llvm From 53393e26d5f4e93c88c4c1fb737b30658de49992 Mon Sep 17 00:00:00 2001 From: hev Date: Thu, 22 May 2025 18:49:27 +0800 Subject: [PATCH 44/83] [LoongArch] Prevent R0/R1 allocation for rj operand of [G]CSRXCHG (#140862) The `[G]CSRXCHG` instruction must not use R0 or R1 as the `rj` operand, as encoding `rj` as 0 or 1 will be interpreted as `[G]CSRRD` OR `[G]CSRWR`, respectively, rather than `[G]CSRXCHG`. This patch introduces a new register class `GPRNoR0R1` and updates the `[G]CSRXCHG` instruction definition to use it for the `rj` operand, ensuring the register allocator avoids assigning R0 or R1. Fixes #140842 (cherry picked from commit bd8578c3574d77bc1231f047bced4a0053a1b000) --- .../AsmParser/LoongArchAsmParser.cpp | 3 +++ .../Disassembler/LoongArchDisassembler.cpp | 8 +++++++ .../Target/LoongArch/LoongArchInstrInfo.td | 6 ++--- .../Target/LoongArch/LoongArchLVZInstrInfo.td | 2 +- .../Target/LoongArch/LoongArchRegisterInfo.td | 5 ++++ .../CodeGen/LoongArch/csrxchg-intrinsic.ll | 24 +++++++++++++++++++ 6 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 420b98b8a9c1f..f31d85305bbbe 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -1663,6 +1663,9 @@ LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_Success; } + if (Kind == MCK_GPRNoR0R1 && (Reg == LoongArch::R0 || Reg == LoongArch::R1)) + return Match_RequiresOpnd2NotR0R1; + return Match_InvalidOperand; } diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp index 5963208691f72..761682423fffe 100644 --- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -62,6 +62,14 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, return MCDisassembler::Success; } +static DecodeStatus +DecodeGPRNoR0R1RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo <= 1) + return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 9b93a9f824726..00e8548071182 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -2351,7 +2351,7 @@ let hasSideEffects = 1, Constraints = "$rd = $dst" in { def CSRWR : FmtCSR<0x04000020, (outs GPR:$dst), (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">; def CSRXCHG : FmtCSRXCHG<0x04000000, (outs GPR:$dst), - (ins GPR:$rd, GPR:$rj, uimm14:$csr_num), + (ins GPR:$rd, GPRNoR0R1:$rj, uimm14:$csr_num), "$rd, $rj, $csr_num">; } // hasSideEffects = 1, Constraints = "$rd = $dst" @@ -2398,8 +2398,8 @@ def IDLE : MISC_I15<0x06488000>; def : Pat<(loongarch_csrrd uimm14:$imm14), (CSRRD uimm14:$imm14)>; def : Pat<(loongarch_csrwr GPR:$rd, uimm14:$imm14), (CSRWR GPR:$rd, uimm14:$imm14)>; -def : Pat<(loongarch_csrxchg GPR:$rd, GPR:$rj, uimm14:$imm14), - (CSRXCHG GPR:$rd, GPR:$rj, uimm14:$imm14)>; +def : Pat<(loongarch_csrxchg GPR:$rd, GPRNoR0R1:$rj, uimm14:$imm14), + (CSRXCHG GPR:$rd, GPRNoR0R1:$rj, uimm14:$imm14)>; def : Pat<(loongarch_iocsrrd_b GPR:$rj), (IOCSRRD_B GPR:$rj)>; def : Pat<(loongarch_iocsrrd_h GPR:$rj), (IOCSRRD_H GPR:$rj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td index 50a16e2dd56b9..07b77ee971f27 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td @@ -23,7 +23,7 @@ let Constraints = "$rd = $dst" in { def GCSRWR : FmtCSR<0x05000020, (outs GPR:$dst), (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">; def GCSRXCHG : FmtCSRXCHG<0x05000000, (outs GPR:$dst), - (ins GPR:$rd, GPR:$rj, uimm14:$csr_num), + (ins GPR:$rd, GPRNoR0R1:$rj, uimm14:$csr_num), "$rd, $rj, $csr_num">; } // Constraints = "$rd = $dst" diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td index a8419980868ee..2a8cdf953e00f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -127,6 +127,11 @@ def GPRT : GPRRegisterClass<(add // a0...a7, t0...t8 // prediction. def GPRJR : GPRRegisterClass<(sub GPR, R1)>; +// Don't use R0 or R1 for the rj operand of [G]CSRXCHG, because when rj is +// encoded as 0 or 1, the instruction is interpreted as [G]CSRRD or [G]CSRWR, +// respectively, rather than [G]CSRXCHG. +def GPRNoR0R1 : GPRRegisterClass<(sub GPR, R0, R1)>; + // Floating point registers let RegAltNameIndices = [RegAliasName] in { diff --git a/llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll b/llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll new file mode 100644 index 0000000000000..2f38b3a8c7ad1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/csrxchg-intrinsic.ll @@ -0,0 +1,24 @@ +; RUN: llc --mtriple=loongarch32 --mattr=+f --verify-machineinstrs < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+f --verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg) + +;; Check that the rj operand of csrxchg is not R0. +define void @csrxchg_w_rj_not_r0(i32 signext %a) { +; CHECK-NOT: csrxchg ${{[a-z]*}}, $r0, 0 +; CHECK-NOT: csrxchg ${{[a-z]*}}, $zero, 0 +entry: + %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 0, i32 0) + ret void +} + +;; Check that the rj operand of csrxchg is not R1. +define i32 @csrxchg_w_rj_not_r1() { +; CHECK-NOT: csrxchg ${{[a-z]*}}, $r1, 0 +; CHECK-NOT: csrxchg ${{[a-z]*}}, $ra, 0 +entry: + %0 = tail call i32 asm "", "=r,r,i,{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},{r16},{r17},{r18},{r19},{r20},{r23},{r24},{r25},{r26},{r27},{r28},{r29},{r30},{r31},0"(i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %1 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %0, i32 4, i32 0) + %2 = tail call i32 asm "", "=r,r,i,{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},{r16},{r17},{r18},{r19},{r20},{r23},{r24},{r25},{r26},{r27},{r28},{r29},{r30},{r31},0"(i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %1) + ret i32 %2 +} From aa804fd3e624cb92c6e7665182504c6049387f35 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 28 Apr 2025 13:45:11 -0700 Subject: [PATCH 45/83] [sanitizer_common] Remove interceptors for deprecated struct termio (#137403) This struct will be removed from glibc-2.42 and has been deprecated for a very long time. Fixes #137321 (cherry picked from commit 59978b21ad9c65276ee8e14f26759691b8a65763) --- .../sanitizer_common_interceptors_ioctl.inc | 8 -------- .../sanitizer_common/sanitizer_platform_limits_posix.cpp | 3 --- .../sanitizer_common/sanitizer_platform_limits_posix.h | 1 - 3 files changed, 12 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc index 49ec4097c900b..dda11daa77f49 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc @@ -338,17 +338,9 @@ static void ioctl_table_fill() { _(SOUND_PCM_WRITE_CHANNELS, WRITE, sizeof(int)); _(SOUND_PCM_WRITE_FILTER, WRITE, sizeof(int)); _(TCFLSH, NONE, 0); -#if SANITIZER_GLIBC - _(TCGETA, WRITE, struct_termio_sz); -#endif _(TCGETS, WRITE, struct_termios_sz); _(TCSBRK, NONE, 0); _(TCSBRKP, NONE, 0); -#if SANITIZER_GLIBC - _(TCSETA, READ, struct_termio_sz); - _(TCSETAF, READ, struct_termio_sz); - _(TCSETAW, READ, struct_termio_sz); -#endif _(TCSETS, READ, struct_termios_sz); _(TCSETSF, READ, struct_termios_sz); _(TCSETSW, READ, struct_termios_sz); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index ec5f2edab6a64..b3e717591d6c7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -485,9 +485,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned struct_input_id_sz = sizeof(struct input_id); unsigned struct_mtpos_sz = sizeof(struct mtpos); unsigned struct_rtentry_sz = sizeof(struct rtentry); -#if SANITIZER_GLIBC || SANITIZER_ANDROID - unsigned struct_termio_sz = sizeof(struct termio); -#endif unsigned struct_vt_consize_sz = sizeof(struct vt_consize); unsigned struct_vt_sizes_sz = sizeof(struct vt_sizes); unsigned struct_vt_stat_sz = sizeof(struct vt_stat); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 1a7d9e64048eb..005ff27624629 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -1043,7 +1043,6 @@ extern unsigned struct_hd_geometry_sz; extern unsigned struct_input_absinfo_sz; extern unsigned struct_input_id_sz; extern unsigned struct_mtpos_sz; -extern unsigned struct_termio_sz; extern unsigned struct_vt_consize_sz; extern unsigned struct_vt_sizes_sz; extern unsigned struct_vt_stat_sz; From 5d99a97583e148e2c0ad462bb35292366105e188 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 26 May 2025 09:43:00 +0200 Subject: [PATCH 46/83] [MachO] Improve bounds check (#141083) The current check may fail if the addition overflows. I've observed failures of macho-invalid.test on 32-bit due to this. Instead, compare against the remaining bytes until the end of the object. (cherry picked from commit 3f29acb51739a3e6bfb8cc623eb37cb734c98a63) --- llvm/lib/Object/MachOObjectFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 69d36e6a77db7..5db264207ffb7 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -192,7 +192,8 @@ static Expected getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr, uint32_t LoadCommandIndex) { if (auto CmdOrErr = getStructOrErr(Obj, Ptr)) { - if (CmdOrErr->cmdsize + Ptr > Obj.getData().end()) + assert(Ptr <= Obj.getData().end() && "Start must be before end"); + if (CmdOrErr->cmdsize > (uintptr_t)(Obj.getData().end() - Ptr)) return malformedError("load command " + Twine(LoadCommandIndex) + " extends past end of file"); if (CmdOrErr->cmdsize < 8) From 0e1ef696f1fedd94ac43cafcd09b4ac0071c44be Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Sat, 26 Apr 2025 17:06:04 +0200 Subject: [PATCH 47/83] [Driver] Fix _XOPEN_SOURCE definition on Solaris (#137141) Since commit 613a077b05b8352a48695be295037306f5fca151, `flang` doesn't build any longer on Solaris/amd64: ``` flang/lib/Evaluate/intrinsics-library.cpp:225:26: error: address of overloaded function 'acos' does not match required type '__float128 (__float128)' 225 | FolderFactory::Create("acos"), | ^~~~~~~~~ ``` That patch led to the version of `quadmath.h` deep inside `/usr/gcc/` to be found, thus `HAS_QUADMATHLIB` is defined. However, the `struct HostRuntimeLibrary<__float128, LibraryVersion::Libm>` template is guarded by `_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600`, while `clang` only predefines `_XOPEN_SOURCE=500`. This code dates back to commit 0c1941cb055fcf008e17faa6605969673211bea3 back in 2012. Currently, this is long obsolete and `gcc` prefefines `_XOPEN_SOURCE=600` instead since GCC 4.6 back in 2011. This patch follows that. Tested on `amd64-pc-solaris2.11` and `sparcv9-sun-solaris2.11`. (cherry picked from commit e71c8ea3cc73c8f7b0382468f355a254166d3a72) --- clang/lib/Basic/Targets/OSTargets.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 991efd2bde01f..4cf4230273d38 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -618,14 +618,7 @@ class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo { DefineStd(Builder, "unix", Opts); Builder.defineMacro("__svr4__"); Builder.defineMacro("__SVR4"); - // Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and - // newer, but to 500 for everything else. feature_test.h has a check to - // ensure that you are not using C99 with an old version of X/Open or C89 - // with a new version. - if (Opts.C99) - Builder.defineMacro("_XOPEN_SOURCE", "600"); - else - Builder.defineMacro("_XOPEN_SOURCE", "500"); + Builder.defineMacro("_XOPEN_SOURCE", "600"); if (Opts.CPlusPlus) { Builder.defineMacro("__C99FEATURES__"); Builder.defineMacro("_FILE_OFFSET_BITS", "64"); From 4b6e5a28665348256609945fda55f4bde847433c Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Sun, 25 May 2025 15:40:45 -0700 Subject: [PATCH 48/83] [clang-format] Handle Java text blocks (#141334) Fix #61954 (cherry picked from commit b7f5950bb3b97eac979925a3bbf015530c26962e) --- clang/lib/Format/FormatTokenLexer.cpp | 33 +++++++++++++ clang/lib/Format/FormatTokenLexer.h | 2 + clang/unittests/Format/FormatTestJava.cpp | 57 +++++++++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 16f0a76f3a954..0755a5d355394 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -636,6 +636,36 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { return true; } +void FormatTokenLexer::tryParseJavaTextBlock() { + if (FormatTok->TokenText != "\"\"") + return; + + const auto *S = Lex->getBufferLocation(); + const auto *End = Lex->getBuffer().end(); + + if (S == End || *S != '\"') + return; + + ++S; // Skip the `"""` that begins a text block. + + // Find the `"""` that ends the text block. + for (int Count = 0; Count < 3 && S < End; ++S) { + switch (*S) { + case '\\': + Count = -1; + break; + case '\"': + ++Count; + break; + default: + Count = 0; + } + } + + // Ignore the possibly invalid text block. + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S))); +} + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates @@ -1326,6 +1356,9 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); ++Column; StateStack.push(LexerState::TOKEN_STASHED); + } else if (Style.Language == FormatStyle::LK_Java && + FormatTok->is(tok::string_literal)) { + tryParseJavaTextBlock(); } if (Style.isVerilog() && Tokens.size() > 0 && diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 61474a3f9ada8..d9a25c8ef3538 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -71,6 +71,8 @@ class FormatTokenLexer { bool canPrecedeRegexLiteral(FormatToken *Prev); + void tryParseJavaTextBlock(); + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 33998bc7ff858..d0a3b4eb96d69 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -789,6 +789,63 @@ TEST_F(FormatTestJava, AlignCaseArrows) { Style); } +TEST_F(FormatTestJava, TextBlock) { + verifyNoChange("String myStr = \"\"\"\n" + "hello\n" + "there\n" + "\"\"\";"); + + verifyNoChange("String tb = \"\"\"\n" + " the new\"\"\";"); + + verifyNoChange("System.out.println(\"\"\"\n" + " This is the first line\n" + " This is the second line\n" + " \"\"\");"); + + verifyNoChange("void writeHTML() {\n" + " String html = \"\"\" \n" + " \n" + "

Hello World.

\n" + " \n" + "\"\"\";\n" + " writeOutput(html);\n" + "}"); + + verifyNoChange("String colors = \"\"\"\t\n" + " red\n" + " green\n" + " blue\"\"\".indent(4);"); + + verifyNoChange("String code = \"\"\"\n" + " String source = \\\"\"\"\n" + " String message = \"Hello, World!\";\n" + " System.out.println(message);\n" + " \\\"\"\";\n" + " \"\"\";"); + + verifyNoChange( + "class Outer {\n" + " void printPoetry() {\n" + " String lilacs = \"\"\"\n" + "Passing the apple-tree blows of white and pink in the orchards\n" + "\"\"\";\n" + " System.out.println(lilacs);\n" + " }\n" + "}"); + + verifyNoChange("String name = \"\"\"\r\n" + " red\n" + " green\n" + " blue\\\n" + " \"\"\";"); + + verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";"); + + verifyNoChange("String name = \"\"\"\n" + " Pat Q. Smith"); +} + } // namespace } // namespace test } // namespace format From 47addd4540b4c393e478ba92bea2589e330c57fb Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 21 May 2025 15:57:38 +0200 Subject: [PATCH 49/83] [libclc] Include isnan implementation for SPIR-V targets The fma software emulation requires it. Similar to https://github.com/llvm/llvm-project/pull/124614 --- libclc/clc/lib/spirv/SOURCES | 1 + libclc/clc/lib/spirv64/SOURCES | 1 + 2 files changed, 2 insertions(+) diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index 509236d587cd0..f97a1407f6631 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -10,5 +10,6 @@ ../generic/math/clc_nextafter.cl ../generic/math/clc_rint.cl ../generic/math/clc_trunc.cl +../generic/relational/clc_isnan.cl ../generic/relational/clc_select.cl ../generic/shared/clc_clamp.cl diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES index 509236d587cd0..f97a1407f6631 100644 --- a/libclc/clc/lib/spirv64/SOURCES +++ b/libclc/clc/lib/spirv64/SOURCES @@ -10,5 +10,6 @@ ../generic/math/clc_nextafter.cl ../generic/math/clc_rint.cl ../generic/math/clc_trunc.cl +../generic/relational/clc_isnan.cl ../generic/relational/clc_select.cl ../generic/shared/clc_clamp.cl From 7cf14539b644fd29d1c2a60f335d80d34f269f21 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 28 May 2025 11:32:12 -0700 Subject: [PATCH 50/83] Bump version to 20.1.7 --- cmake/Modules/LLVMVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 40ba164763d60..1887043b7612a 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 6) + set(LLVM_VERSION_PATCH 7) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) From 7759bb57c24390797ee34fa58a5e1234f5aa9369 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Thu, 29 May 2025 00:28:21 -0300 Subject: [PATCH 51/83] [clang] Serialization: support hashing null template arguments When performing overload resolution during code completion, clang will allow incomplete substitutions in more places than would be allowed for valid code, because for completion to work well, it needs clang to keep going so it can explore the space of possibilities. Notably, we accept instantiating declarations will null template arguments, and this works fine, except that when lazily loading serialzied templated declarations, the template argument hasher assumes null arguments can't be used. This patch makes the hasher happily accept that. Fixes https://github.com/llvm/llvm-project/issues/139019 --- clang/docs/ReleaseNotes.rst | 1 + .../Serialization/TemplateArgumentHasher.cpp | 4 ++- clang/test/CodeCompletion/GH139019.cpp | 26 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeCompletion/GH139019.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7b84210fddab3..262bf4e3d4f5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1118,6 +1118,7 @@ Miscellaneous Clang Crashes Fixed - Fixed a crash when an unscoped enumeration declared by an opaque-enum-declaration within a class template with a dependent underlying type is subject to integral promotion. (#GH117960) +- Fix code completion crash involving PCH serialzied templates. (#GH139019) OpenACC Specific Changes ------------------------ diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp index 598f098f526d0..5fd6941256fe2 100644 --- a/clang/lib/Serialization/TemplateArgumentHasher.cpp +++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp @@ -65,7 +65,9 @@ void TemplateArgumentHasher::AddTemplateArgument(TemplateArgument TA) { switch (Kind) { case TemplateArgument::Null: - llvm_unreachable("Expected valid TemplateArgument"); + // These can occur in incomplete substitutions performed with code + // completion (see PartialOverloading). + break; case TemplateArgument::Type: AddQualType(TA.getAsType()); break; diff --git a/clang/test/CodeCompletion/GH139019.cpp b/clang/test/CodeCompletion/GH139019.cpp new file mode 100644 index 0000000000000..fed35b38362a1 --- /dev/null +++ b/clang/test/CodeCompletion/GH139019.cpp @@ -0,0 +1,26 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/test.hpp -emit-pch -o %t/1.pch +// RUN: %clang_cc1 -std=c++20 %t/test.cpp -include-pch %t/1.pch -code-completion-at=%t/test.cpp:7:17 + +//--- test.hpp +#pragma once +class provider_t +{ + public: + template + void emit(T *data) + {} +}; + +//--- test.cpp +#include "test.hpp" + +void test() +{ + provider_t *focus; + void *data; + focus->emit(&data); +} From e0586e278f96e4f104c8b8039a41c6832c35be22 Mon Sep 17 00:00:00 2001 From: dianqk Date: Sun, 1 Jun 2025 22:17:57 +0800 Subject: [PATCH 52/83] [RelLookupTableConverter] Drop unnamed_addr to avoid generating GOTPCREL relocations (#142304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow https://github.com/llvm/llvm-project/pull/72584#issuecomment-2925759253, the patch will drop the `unnamed_addr` attribute when generating relative lookup tables. I'm not very confident about this patch, but it does resolve https://github.com/rust-lang/rust/issues/140686, https://github.com/rust-lang/rust/issues/141306 and https://github.com/rust-lang/rust/issues/141737. But I don't think this will result in worse problems. > LLVM provides that the calculation of such a constant initializer will not overflow at link time under the medium code model if x is an unnamed_addr function. However, it does not provide this guarantee for a constant initializer folded into a function body. This intrinsic can be used to avoid the possibility of overflows when loading from such a constant. ([‘llvm.load.relative’ Intrinsic](https://llvm.org/docs/LangRef.html#id2592)) This is my concern. I'm not sure how unnamed_addr provides this guarantee, and I haven't found any test cases. (cherry picked from commit aa09dbbbcfd4724bd04eea48763a1322f659637d) --- .../Utils/RelLookupTableConverter.cpp | 17 +++ .../RelLookupTableConverter/unnamed_addr.ll | 119 ++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index 2700b4307308c..4486cba2bf6c0 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/TargetParser/Triple.h" using namespace llvm; @@ -108,8 +109,24 @@ static GlobalVariable *createRelLookupTable(Function &Func, uint64_t Idx = 0; SmallVector RelLookupTableContents(NumElts); + Triple TT(M.getTargetTriple()); + // FIXME: This should be removed in the future. + bool ShouldDropUnnamedAddr = + // Drop unnamed_addr to avoid matching pattern in + // `handleIndirectSymViaGOTPCRel`, which generates GOTPCREL relocations + // not supported by the GNU linker and LLD versions below 18 on aarch64. + TT.isAArch64() + // Apple's ld64 (and ld-prime on Xcode 15.2) miscompile something on + // x86_64-apple-darwin. See + // https://github.com/rust-lang/rust/issues/140686 and + // https://github.com/rust-lang/rust/issues/141306. + || (TT.isX86() && TT.isOSDarwin()); + for (Use &Operand : LookupTableArr->operands()) { Constant *Element = cast(Operand); + if (ShouldDropUnnamedAddr) + if (auto *GlobalElement = dyn_cast(Element)) + GlobalElement->setUnnamedAddr(GlobalValue::UnnamedAddr::None); Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); diff --git a/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll new file mode 100644 index 0000000000000..78b8a4aa126c9 --- /dev/null +++ b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; REQUIRES: x86-registered-target +; REQUIRES: aarch64-registered-target +; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -mtriple=x86_64-apple-darwin -S | FileCheck -check-prefix=x86_64-apple-darwin %s +; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -mtriple=aarch64 -S | FileCheck -check-prefix=aarch64 %s +; RUN: opt < %s -passes=rel-lookup-table-converter -relocation-model=pic -mtriple=x86_64 -S | FileCheck -check-prefix=x86_64 %s + +@a0 = private unnamed_addr constant i32 0 +@a1 = private unnamed_addr constant i32 1 +@a2 = private unnamed_addr constant i32 2 +@load_relative_1.table = private unnamed_addr constant [3 x ptr] [ptr @a0, ptr @a1, ptr @a2] + +@x0 = internal unnamed_addr constant i64 0 +@x1 = internal unnamed_addr constant i64 1 +@x2 = internal unnamed_addr constant i64 2 +@x3 = internal unnamed_addr constant i64 3 +@y0 = internal unnamed_addr constant ptr @x3 +@y1 = internal unnamed_addr constant ptr @x2 +@y2 = internal unnamed_addr constant ptr @x1 +@y3 = internal unnamed_addr constant ptr @x0 +@load_relative_2.table = private unnamed_addr constant [4 x ptr] [ptr @y3, ptr @y2, ptr @y1, ptr @y0] + +;. +; x86_64-apple-darwin: @a0 = private constant i32 0 +; x86_64-apple-darwin: @a1 = private constant i32 1 +; x86_64-apple-darwin: @a2 = private constant i32 2 +; x86_64-apple-darwin: @load_relative_1.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a0 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32)], align 4 +; x86_64-apple-darwin: @x0 = internal unnamed_addr constant i64 0 +; x86_64-apple-darwin: @x1 = internal unnamed_addr constant i64 1 +; x86_64-apple-darwin: @x2 = internal unnamed_addr constant i64 2 +; x86_64-apple-darwin: @x3 = internal unnamed_addr constant i64 3 +; x86_64-apple-darwin: @y0 = internal constant ptr @x3 +; x86_64-apple-darwin: @y1 = internal constant ptr @x2 +; x86_64-apple-darwin: @y2 = internal constant ptr @x1 +; x86_64-apple-darwin: @y3 = internal constant ptr @x0 +; x86_64-apple-darwin: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4 +;. +; aarch64: @a0 = private constant i32 0 +; aarch64: @a1 = private constant i32 1 +; aarch64: @a2 = private constant i32 2 +; aarch64: @load_relative_1.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a0 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32)], align 4 +; aarch64: @x0 = internal unnamed_addr constant i64 0 +; aarch64: @x1 = internal unnamed_addr constant i64 1 +; aarch64: @x2 = internal unnamed_addr constant i64 2 +; aarch64: @x3 = internal unnamed_addr constant i64 3 +; aarch64: @y0 = internal constant ptr @x3 +; aarch64: @y1 = internal constant ptr @x2 +; aarch64: @y2 = internal constant ptr @x1 +; aarch64: @y3 = internal constant ptr @x0 +; aarch64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4 +;. +; x86_64: @a0 = private unnamed_addr constant i32 0 +; x86_64: @a1 = private unnamed_addr constant i32 1 +; x86_64: @a2 = private unnamed_addr constant i32 2 +; x86_64: @load_relative_1.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @a0 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a1 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @a2 to i64), i64 ptrtoint (ptr @load_relative_1.table.rel to i64)) to i32)], align 4 +; x86_64: @x0 = internal unnamed_addr constant i64 0 +; x86_64: @x1 = internal unnamed_addr constant i64 1 +; x86_64: @x2 = internal unnamed_addr constant i64 2 +; x86_64: @x3 = internal unnamed_addr constant i64 3 +; x86_64: @y0 = internal unnamed_addr constant ptr @x3 +; x86_64: @y1 = internal unnamed_addr constant ptr @x2 +; x86_64: @y2 = internal unnamed_addr constant ptr @x1 +; x86_64: @y3 = internal unnamed_addr constant ptr @x0 +; x86_64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4 +;. +define ptr @load_relative_1(i64 %offset) { +; x86_64-apple-darwin-LABEL: define ptr @load_relative_1( +; x86_64-apple-darwin-SAME: i64 [[OFFSET:%.*]]) { +; x86_64-apple-darwin-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; x86_64-apple-darwin-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_1.table.rel, i64 [[RELTABLE_SHIFT]]) +; x86_64-apple-darwin-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; +; aarch64-LABEL: define ptr @load_relative_1( +; aarch64-SAME: i64 [[OFFSET:%.*]]) { +; aarch64-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; aarch64-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_1.table.rel, i64 [[RELTABLE_SHIFT]]) +; aarch64-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; +; x86_64-LABEL: define ptr @load_relative_1( +; x86_64-SAME: i64 [[OFFSET:%.*]]) { +; x86_64-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; x86_64-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_1.table.rel, i64 [[RELTABLE_SHIFT]]) +; x86_64-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; + %gep = getelementptr inbounds [3 x ptr], ptr @load_relative_1.table, i64 0, i64 %offset + %load = load ptr, ptr %gep + ret ptr %load +} + +define ptr @load_relative_2(i64 %offset) { +; x86_64-apple-darwin-LABEL: define ptr @load_relative_2( +; x86_64-apple-darwin-SAME: i64 [[OFFSET:%.*]]) { +; x86_64-apple-darwin-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; x86_64-apple-darwin-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_2.table.rel, i64 [[RELTABLE_SHIFT]]) +; x86_64-apple-darwin-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; +; aarch64-LABEL: define ptr @load_relative_2( +; aarch64-SAME: i64 [[OFFSET:%.*]]) { +; aarch64-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; aarch64-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_2.table.rel, i64 [[RELTABLE_SHIFT]]) +; aarch64-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; +; x86_64-LABEL: define ptr @load_relative_2( +; x86_64-SAME: i64 [[OFFSET:%.*]]) { +; x86_64-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; x86_64-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_2.table.rel, i64 [[RELTABLE_SHIFT]]) +; x86_64-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; + %gep = getelementptr inbounds [4 x ptr], ptr @load_relative_2.table, i64 0, i64 %offset + %load = load ptr, ptr %gep + ret ptr %load +} +;. +; x86_64-apple-darwin: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +;. +; aarch64: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +;. +; x86_64: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +;. From f6532710ace867e5660ea9e47ab77f2b8349896f Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Mon, 2 Jun 2025 13:35:27 -0700 Subject: [PATCH 53/83] [clang-format] Correctly annotate token-pasted function decl names (#142337) Fix #142178 (cherry picked from commit 7bf5862dbfda590282f50b14e6d7d5f990bf1900) --- clang/lib/Format/TokenAnnotator.cpp | 2 ++ clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 11b941c5a0411..0c13356ca96de 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3839,6 +3839,8 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, } else { if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0) return false; + while (Next && Next->startsSequence(tok::hashhash, tok::identifier)) + Next = Next->Next->Next; for (; Next; Next = Next->Next) { if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { Next = Next->MatchingParen; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 757db66c3e298..602c2d5eba29a 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2187,6 +2187,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) { EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen); + Tokens = annotate("#define FUNC(foo, bar, baz) \\\n" + " auto foo##bar##baz() -> Type {}"); + ASSERT_EQ(Tokens.size(), 23u) << Tokens; + EXPECT_TOKEN(Tokens[11], tok::identifier, TT_FunctionDeclarationName); + EXPECT_TOKEN(Tokens[16], tok::l_paren, TT_FunctionDeclarationLParen); + EXPECT_TOKEN(Tokens[18], tok::arrow, TT_TrailingReturnArrow); + Tokens = annotate("int iso_time(time_t);"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); From e5dd4f129454b518ab5f8d4011997746490bb3a3 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Mon, 9 Jun 2025 00:33:20 -0400 Subject: [PATCH 54/83] [clangd] Guard against trivial FunctionProtoTypeLoc when creating inlay hints (#143087) Fixes https://github.com/llvm/llvm-project/issues/142608 (cherry picked from commit 392bd577e37d795224da6fefc4b621a3f117105e) --- clang-tools-extra/clangd/InlayHints.cpp | 7 ++++++- clang-tools-extra/clangd/unittests/InlayHintTests.cpp | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp index 1b1bcf78c9855..a2b856ad30519 100644 --- a/clang-tools-extra/clangd/InlayHints.cpp +++ b/clang-tools-extra/clangd/InlayHints.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/identity.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" @@ -368,7 +369,11 @@ static FunctionProtoTypeLoc getPrototypeLoc(Expr *Fn) { } if (auto F = Target.getAs()) { - return F; + // In some edge cases the AST can contain a "trivial" FunctionProtoTypeLoc + // which has null parameters. Avoid these as they don't contain useful + // information. + if (llvm::all_of(F.getParams(), llvm::identity())) + return F; } return {}; diff --git a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp index 77d78b8777fe3..8ed8401f9fce9 100644 --- a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp +++ b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp @@ -997,11 +997,16 @@ TEST(ParameterHints, FunctionPointer) { f3_t f3; using f4_t = void(__stdcall *)(int param); f4_t f4; + __attribute__((noreturn)) f4_t f5; void bar() { f1($f1[[42]]); f2($f2[[42]]); f3($f3[[42]]); f4($f4[[42]]); + // This one runs into an edge case in clang's type model + // and we can't extract the parameter name. But at least + // we shouldn't crash. + f5(42); } )cpp", ExpectedHint{"param: ", "f1"}, ExpectedHint{"param: ", "f2"}, From 22a3e6b194092182c592bdf48eef9d37c53b0cbf Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 29 May 2025 10:22:24 +0100 Subject: [PATCH 55/83] release/20.x: [AArch64] Handle XAR with v1i64 operand types (#141754) When converting ROTR(XOR(a, b)) to XAR(a, b), or ROTR(a, a) to XAR(a, zero) we were not handling v1i64 types, meaning illegal copies get generated. This addresses that by generating insert_subreg and extract_subreg for v1i64 to keep the values with the correct types. Fixes #141746 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 24 ++++++++++++++++++- llvm/test/CodeGen/AArch64/xar.ll | 20 ++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 1387a224fa660..0aad7665f6216 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4608,9 +4608,31 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) { if (ShAmt + HsAmt != 64) return false; + // If the input is a v1i64, widen to a v2i64 to use XAR. + assert((VT == MVT::v1i64 || VT == MVT::v2i64) && "Unexpected XAR type!"); + if (VT == MVT::v1i64) { + EVT SVT = MVT::v2i64; + SDValue Undef = + SDValue(CurDAG->getMachineNode(AArch64::IMPLICIT_DEF, DL, SVT), 0); + SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef, + R1, DSub), + 0); + if (R2.getValueType() == MVT::v1i64) + R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, + Undef, R2, DSub), + 0); + } + SDValue Ops[] = {R1, R2, Imm}; - CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops); + SDNode *XAR = CurDAG->getMachineNode(AArch64::XAR, DL, MVT::v2i64, Ops); + if (VT == MVT::v1i64) { + SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); + XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT, + SDValue(XAR, 0), DSub); + } + ReplaceNode(N, XAR); return true; } diff --git a/llvm/test/CodeGen/AArch64/xar.ll b/llvm/test/CodeGen/AArch64/xar.ll index d050eaf6646de..5666ab35cde48 100644 --- a/llvm/test/CodeGen/AArch64/xar.ll +++ b/llvm/test/CodeGen/AArch64/xar.ll @@ -19,4 +19,24 @@ define <2 x i64> @xar(<2 x i64> %x, <2 x i64> %y) { ret <2 x i64> %b } +define <1 x i64> @xar_v1i64(<1 x i64> %a, <1 x i64> %b) { +; SHA3-LABEL: xar_v1i64: +; SHA3: // %bb.0: +; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0 +; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1 +; SHA3-NEXT: xar v0.2d, v0.2d, v1.2d, #63 +; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; SHA3-NEXT: ret +; +; NOSHA3-LABEL: xar_v1i64: +; NOSHA3: // %bb.0: +; NOSHA3-NEXT: eor v1.8b, v0.8b, v1.8b +; NOSHA3-NEXT: shl d0, d1, #1 +; NOSHA3-NEXT: usra d0, d1, #63 +; NOSHA3-NEXT: ret + %v.val = xor <1 x i64> %a, %b + %fshl = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %v.val, <1 x i64> %v.val, <1 x i64> splat (i64 1)) + ret <1 x i64> %fshl +} + declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) From 2481e590eec725f2ad6b4945eebb978f93578404 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 2 Jun 2025 10:52:10 +0100 Subject: [PATCH 56/83] [AArch64][SME] Fix accessing the emergency spill slot with hazard padding (#142190) This patch fixes an issue where when hazard padding was enabled locals, including the emergency spill slot, could not be directly addressed. Generally, this is fine, we can materialize the constant offset in a scratch register, but if there's no register free we need to spill, and if we can't even reach the emergency spill slot then we fail to compile. This patch fixes this by ensuring that if a function has variable-sized objects and is likely to have hazard padding we enable the base pointer. Then if we know a function has hazard padding, place the emergency spill slot next to the BP/SP, to ensure it can be directly accessed without stepping over any hazard padding. (cherry picked from commit b5cf03033251a642b184b2e0ea6bdac171c17702) --- .../Target/AArch64/AArch64RegisterInfo.cpp | 19 +++- ...ramelayout-scavengingslot-stack-hazard.mir | 99 +++++++++++++++++++ llvm/test/CodeGen/AArch64/stack-hazard.ll | 30 +++--- 3 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index e9730348ba58e..367f6b626b420 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64InstPrinter.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/BitVector.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -615,14 +616,27 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return true; auto &ST = MF.getSubtarget(); + const AArch64FunctionInfo *AFI = MF.getInfo(); if (ST.hasSVE() || ST.isStreaming()) { - const AArch64FunctionInfo *AFI = MF.getInfo(); // Frames that have variable sized objects and scalable SVE objects, // should always use a basepointer. if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE()) return true; } + // Frames with hazard padding can have a large offset between the frame + // pointer and GPR locals, which includes the emergency spill slot. If the + // emergency spill slot is not within range of the load/store instructions + // (which have a signed 9-bit range), we will fail to compile if it is used. + // Since hasBasePointer() is called before we know if we have hazard padding + // or an emergency spill slot we need to enable the basepointer + // conservatively. + if (AFI->hasStackHazardSlotIndex() || + (ST.getStreamingHazardSize() && + !SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody())) { + return true; + } + // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -747,7 +761,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { assert((!MF.getSubtarget().hasSVE() || AFI->hasCalculatedStackSizeSVE()) && "Expected SVE area to be calculated by this point"); - return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE(); + return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() && + !AFI->hasStackHazardSlotIndex(); } bool AArch64RegisterInfo::requiresFrameIndexScavenging( diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir new file mode 100644 index 0000000000000..52ac36f801854 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir @@ -0,0 +1,99 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-stack-hazard-size=1024 -run-pass=prologepilog %s -o - | FileCheck %s +--- | + + define void @stack_hazard_streaming_compat() "aarch64_pstate_sm_compatible" { entry: unreachable } + define void @stack_hazard_streaming_compat_emergency_spill_slot() "aarch64_pstate_sm_compatible" { entry: unreachable } + +... + +# +------------------+ +# | GPR callee-saves | +# +------------------+ <- FP +# | | +# +------------------+ +# | FPR locals | +# | %stack.1 | +# +------------------+ +# | | +# +------------------+ +# | GPR locals | +# | %stack.2 | +# | | +# +------------------+ <- BP +# | | +# +------------------+ <- SP (can't be used due to VLA) + +# In this case without the base pointer we'd need the emergency spill slot to +# access both %stack.1 and %stack.2. With the base pointer we can reach both +# without spilling. + +name: stack_hazard_streaming_compat +# CHECK-LABEL: name: stack_hazard_streaming_compat +# CHECK: bb.0: +# CHECK: STRDui $d0, $x19, 131 +# CHECK-NEXT: STRXui $x0, $x19, 1 +# CHECK: bb.1: +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 8, alignment: 8 } +body: | + bb.0: + liveins: $x0, $x8, $d0 + $x9 = LDRXui $x0, 0 :: (load (s64)) + STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1) + STRXui $x0, %stack.2, 0 :: (store (s64) into %stack.2) + B %bb.1 + bb.1: + liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr + RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr +... +--- +# +------------------+ +# | GPR callee-saves | +# +------------------+ <- FP +# | | +# +------------------+ +# | FPR locals | +# | %stack.1 | +# +------------------+ +# | | +# +------------------+ +# | GPR locals | +# | %stack.2 | (very large) +# | | +# +------------------+ <- BP +# | | +# +------------------+ <- SP (can't be used due to VLA) + +# In this case we need to use the emergency spill slot to access %stack.1 as it +# is too far from the frame pointer and the base pointer to directly address. +# Note: This also tests that the located near the SP/BP. + +name: stack_hazard_streaming_compat_emergency_spill_slot +# CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot +# CHECK: bb.0: +# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0 +# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0 +# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095 +# CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0 +# CHECK: bb.1: +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 32761, alignment: 8 } +body: | + bb.0: + liveins: $x0, $x8, $d0 + $x9 = LDRXui $x0, 0 :: (load (s64)) + STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1) + B %bb.1 + bb.1: + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr + RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index a4c2b30566a95..df4918493edf8 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -2911,12 +2911,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK64-NEXT: mov x9, sp ; CHECK64-NEXT: mov w20, w0 ; CHECK64-NEXT: msub x9, x8, x8, x9 +; CHECK64-NEXT: mov x19, sp ; CHECK64-NEXT: mov sp, x9 -; CHECK64-NEXT: stur x9, [x29, #-208] -; CHECK64-NEXT: sub x9, x29, #208 -; CHECK64-NEXT: sturh wzr, [x29, #-198] -; CHECK64-NEXT: stur wzr, [x29, #-196] -; CHECK64-NEXT: sturh w8, [x29, #-200] +; CHECK64-NEXT: str x9, [x19] +; CHECK64-NEXT: add x9, x19, #0 +; CHECK64-NEXT: strh wzr, [x19, #10] +; CHECK64-NEXT: str wzr, [x19, #12] +; CHECK64-NEXT: strh w8, [x19, #8] ; CHECK64-NEXT: msr TPIDR2_EL0, x9 ; CHECK64-NEXT: .cfi_offset vg, -32 ; CHECK64-NEXT: smstop sm @@ -2925,7 +2926,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK64-NEXT: .cfi_restore vg ; CHECK64-NEXT: smstart za ; CHECK64-NEXT: mrs x8, TPIDR2_EL0 -; CHECK64-NEXT: sub x0, x29, #208 +; CHECK64-NEXT: add x0, x19, #0 ; CHECK64-NEXT: cbnz x8, .LBB33_2 ; CHECK64-NEXT: // %bb.1: // %entry ; CHECK64-NEXT: bl __arm_tpidr2_restore @@ -2991,16 +2992,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK1024-NEXT: mov x9, sp ; CHECK1024-NEXT: mov w20, w0 ; CHECK1024-NEXT: msub x9, x8, x8, x9 +; CHECK1024-NEXT: mov x19, sp ; CHECK1024-NEXT: mov sp, x9 -; CHECK1024-NEXT: sub x10, x29, #1872 -; CHECK1024-NEXT: stur x9, [x10, #-256] -; CHECK1024-NEXT: sub x9, x29, #1862 -; CHECK1024-NEXT: sub x10, x29, #1860 -; CHECK1024-NEXT: sturh wzr, [x9, #-256] -; CHECK1024-NEXT: sub x9, x29, #2128 -; CHECK1024-NEXT: stur wzr, [x10, #-256] -; CHECK1024-NEXT: sub x10, x29, #1864 -; CHECK1024-NEXT: sturh w8, [x10, #-256] +; CHECK1024-NEXT: str x9, [x19] +; CHECK1024-NEXT: add x9, x19, #0 +; CHECK1024-NEXT: strh wzr, [x19, #10] +; CHECK1024-NEXT: str wzr, [x19, #12] +; CHECK1024-NEXT: strh w8, [x19, #8] ; CHECK1024-NEXT: msr TPIDR2_EL0, x9 ; CHECK1024-NEXT: .cfi_offset vg, -32 ; CHECK1024-NEXT: smstop sm @@ -3009,7 +3007,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK1024-NEXT: .cfi_restore vg ; CHECK1024-NEXT: smstart za ; CHECK1024-NEXT: mrs x8, TPIDR2_EL0 -; CHECK1024-NEXT: sub x0, x29, #2128 +; CHECK1024-NEXT: add x0, x19, #0 ; CHECK1024-NEXT: cbnz x8, .LBB33_2 ; CHECK1024-NEXT: // %bb.1: // %entry ; CHECK1024-NEXT: bl __arm_tpidr2_restore From acf86c5c4dbe8a65581f5438a61a8dddc4d02b6d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 2 Jun 2025 17:42:02 +0800 Subject: [PATCH 57/83] [CVP] Keep `ReachableCaseCount` in sync with range of condition (#142302) https://github.com/llvm/llvm-project/pull/79993 assumes that a reachable case must be contained by `CR`. However, it doesn't hold for some edge cases. This patch adds additional checks to ensure `ReachableCaseCount` is correct. Note: Similar optimization in SCCP isn't affected by this bug because it uses `CR` to compute `ReachableCaseCount`. Closes https://github.com/llvm/llvm-project/issues/142286. --- .../Scalar/CorrelatedValuePropagation.cpp | 59 +++++++++++-------- .../CorrelatedValuePropagation/switch.ll | 36 +++++++++++ 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 8e74b8645fad9..86c4170b9a977 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -370,15 +370,30 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, { // Scope for SwitchInstProfUpdateWrapper. It must not live during // ConstantFoldTerminator() as the underlying SwitchInst can be changed. SwitchInstProfUpdateWrapper SI(*I); + ConstantRange CR = + LVI->getConstantRangeAtUse(I->getOperandUse(0), /*UndefAllowed=*/false); unsigned ReachableCaseCount = 0; for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); - auto *Res = dyn_cast_or_null( - LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, - /* UseBlockValue */ true)); + std::optional Predicate = std::nullopt; + if (!CR.contains(Case->getValue())) + Predicate = false; + else if (CR.isSingleElement() && + *CR.getSingleElement() == Case->getValue()) + Predicate = true; + if (!Predicate) { + // Handle missing cases, e.g., the range has a hole. + auto *Res = dyn_cast_or_null( + LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, + /* UseBlockValue=*/true)); + if (Res && Res->isZero()) + Predicate = false; + else if (Res && Res->isOne()) + Predicate = true; + } - if (Res && Res->isZero()) { + if (Predicate && !*Predicate) { // This case never fires - remove it. BasicBlock *Succ = CI->getCaseSuccessor(); Succ->removePredecessor(BB); @@ -395,7 +410,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}}); continue; } - if (Res && Res->isOne()) { + if (Predicate && *Predicate) { // This case always fires. Arrange for the switch to be turned into an // unconditional branch by replacing the switch condition with the case // value. @@ -410,28 +425,24 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, ++ReachableCaseCount; } - BasicBlock *DefaultDest = SI->getDefaultDest(); - if (ReachableCaseCount > 1 && - !isa(DefaultDest->getFirstNonPHIOrDbg())) { - ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0), - /*UndefAllowed*/ false); - // The default dest is unreachable if all cases are covered. - if (!CR.isSizeLargerThan(ReachableCaseCount)) { - BasicBlock *NewUnreachableBB = - BasicBlock::Create(BB->getContext(), "default.unreachable", - BB->getParent(), DefaultDest); - new UnreachableInst(BB->getContext(), NewUnreachableBB); + // The default dest is unreachable if all cases are covered. + if (!SI->defaultDestUndefined() && + !CR.isSizeLargerThan(ReachableCaseCount)) { + BasicBlock *DefaultDest = SI->getDefaultDest(); + BasicBlock *NewUnreachableBB = + BasicBlock::Create(BB->getContext(), "default.unreachable", + BB->getParent(), DefaultDest); + new UnreachableInst(BB->getContext(), NewUnreachableBB); - DefaultDest->removePredecessor(BB); - SI->setDefaultDest(NewUnreachableBB); + DefaultDest->removePredecessor(BB); + SI->setDefaultDest(NewUnreachableBB); - if (SuccessorsCount[DefaultDest] == 1) - DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}}); - DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}}); + if (SuccessorsCount[DefaultDest] == 1) + DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}}); + DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}}); - ++NumDeadCases; - Changed = true; - } + ++NumDeadCases; + Changed = true; } } diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll b/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll index a0794d5efe932..7e6aa3eeebe20 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll @@ -294,6 +294,42 @@ cleanup: ret i32 %retval.0 } +; Make sure that we don't branch into unreachable. + +define void @pr142286() { +; CHECK-LABEL: define void @pr142286() { +; CHECK-NEXT: start: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br label [[LOOP2:%.*]] +; CHECK: loop2: +; CHECK-NEXT: br label [[LOOP3:%.*]] +; CHECK: loop3: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +start: + br label %loop + +loop: + %phi = phi i8 [ -1, %start ], [ 0, %loop3 ] + br label %loop2 + +loop2: + br label %loop3 + +loop3: + switch i8 %phi, label %exit [ + i8 0, label %loop3 + i8 1, label %loop2 + i8 2, label %loop + ] + +exit: + ret void +} + declare i32 @call0() declare i32 @call1() declare i32 @call2() From 253e9321c8b6bfcecee166312a5f7da81633764e Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 12 Jun 2025 02:18:21 +0200 Subject: [PATCH 58/83] [release/20.x] Update release notes for SystemZ changes (#140060) --- llvm/docs/ReleaseNotes.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index f34003eaf0fe2..ef4ec9b56f364 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -349,6 +349,15 @@ Changes to the RISC-V Backend extension. * Added ``Sdext`` and ``Sdtrig`` extensions. +Changes to the SystemZ Backend +------------------------------ + +* Added support for the IBM z17 processor and the arch15 cpu architecture. +* Added support for `__builtin_setjump` and `__builtin_longjmp`. +* Improve inlining heuristics to fix compile time explosion in certain cases. +* Improve various cost functions. +* Improve compatibility of the assembler parser with the GNU assembler. + Changes to the WebAssembly Backend ---------------------------------- From 2da24c36c7df73a686b2d990963faf3f738a510f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 28 Feb 2025 20:43:46 -0100 Subject: [PATCH 59/83] [libcxx] Provide locale conversions to tests through lit substitution (#105651) There are 2 problems today that this PR resolves: libcxx tests assume the thousands separator for fr_FR locale is x00A0 on Windows. This currently fails when run on newer versions of Windows (it seems to have been updated to the new correct value of 0x202F around windows 11. The exact windows version where it changed doesn't seem to be documented anywhere). Depending the OS version, you need different values. There are several ifdefs to determine the environment/platform-specific locale conversion values and it leads to maintenance as things change over time. This PR includes the following changes: - Provide the environment's locale conversion values through a substitution. The test can opt in by placing the substitution value in a define flag. - Remove the platform ifdefs (the swapping of values between Windows, Linux, Apple, AIX). This is accomplished through a lit feature action that fetches the environment's locale conversions (lconv) for members like 'thousands_sep' that we need to provide. This should ensure that we don't lose the effectiveness of the test itself. In addition, as a result of the above, this PR: - Fixes a handful of locale tests which unexpectedly fail on newer Windows versions. - Resolves 3 XFAIL FIX-MEs. Originally submitted in https://github.com/llvm/llvm-project/pull/86649. Co-authored-by: Rodrigo Salazar <4rodrigosalazar@gmail.com> (cherry picked from commit f909b2229ac16ae3898d8b158bee85c384173dfa) --- .../get_long_double_fr_FR.pass.cpp | 5 +- .../get_long_double_ru_RU.pass.cpp | 5 +- .../put_long_double_fr_FR.pass.cpp | 5 +- .../put_long_double_ru_RU.pass.cpp | 5 +- .../thousands_sep.pass.cpp | 34 ++----- .../thousands_sep.pass.cpp | 20 ++-- .../time.duration.nonmember/ostream.pass.cpp | 24 ++--- libcxx/test/support/locale_helpers.h | 37 ++------ libcxx/utils/libcxx/test/features.py | 91 ++++++++++++++++++- 9 files changed, 138 insertions(+), 88 deletions(-) diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index bbb67d694970a..f02241ad36a5b 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -13,6 +13,8 @@ // REQUIRES: locale.fr_FR.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP} + // // class money_get @@ -59,7 +61,8 @@ class my_facetw }; static std::wstring convert_thousands_sep(std::wstring const& in) { - return LocaleHelpers::convert_thousands_sep_fr_FR(in); + const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP); + return LocaleHelpers::convert_thousands_sep(in, fr_sep); } #endif // TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp index e680f2ea8816a..371cf0e90c8d3 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp @@ -11,6 +11,8 @@ // REQUIRES: locale.ru_RU.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP} + // XFAIL: glibc-old-ru_RU-decimal-point // @@ -52,7 +54,8 @@ class my_facetw }; static std::wstring convert_thousands_sep(std::wstring const& in) { - return LocaleHelpers::convert_thousands_sep_ru_RU(in); + const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP); + return LocaleHelpers::convert_thousands_sep(in, ru_sep); } #endif // TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index 47a48deb3368c..9ac95cc52ac07 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -13,6 +13,8 @@ // REQUIRES: locale.fr_FR.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP} + // // class money_put @@ -59,7 +61,8 @@ class my_facetw }; static std::wstring convert_thousands_sep(std::wstring const& in) { - return LocaleHelpers::convert_thousands_sep_fr_FR(in); + const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP); + return LocaleHelpers::convert_thousands_sep(in, fr_sep); } #endif // TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp index 4aea1016e735b..be1e397488468 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp @@ -11,6 +11,8 @@ // REQUIRES: locale.ru_RU.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP} + // XFAIL: glibc-old-ru_RU-decimal-point // @@ -52,7 +54,8 @@ class my_facetw }; static std::wstring convert_thousands_sep(std::wstring const& in) { - return LocaleHelpers::convert_thousands_sep_ru_RU(in); + const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP); + return LocaleHelpers::convert_thousands_sep(in, ru_sep); } #endif // TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp index 2a70741d2a0fa..6b6570576a082 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp @@ -9,13 +9,14 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd -// XFAIL: LIBCXX-FREEBSD-FIXME - // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ru_RU.UTF-8 // REQUIRES: locale.zh_CN.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP} +// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP} + // // class moneypunct_byname @@ -27,6 +28,7 @@ #include #include "test_macros.h" +#include "locale_helpers.h" #include "platform_support.h" // locale name macros class Fnf @@ -110,17 +112,10 @@ int main(int, char**) Fnt f(LOCALE_fr_FR_UTF_8, 1); assert(f.thousands_sep() == ' '); } - // The below tests work around GLIBC's use of U202F as mon_thousands_sep. + #ifndef TEST_HAS_NO_WIDE_CHARACTERS -#if defined(_CS_GNU_LIBC_VERSION) - const wchar_t fr_sep = glibc_version_less_than("2.27") ? L' ' : L'\u202F'; -#elif defined(_WIN32) - const wchar_t fr_sep = L'\u00A0'; -#elif defined(_AIX) - const wchar_t fr_sep = L'\u202F'; -#else - const wchar_t fr_sep = L' '; -#endif + const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP); + { Fwf f(LOCALE_fr_FR_UTF_8, 1); assert(f.thousands_sep() == fr_sep); @@ -140,19 +135,8 @@ int main(int, char**) assert(f.thousands_sep() == sep); } #ifndef TEST_HAS_NO_WIDE_CHARACTERS - // The below tests work around GLIBC's use of U00A0 as mon_thousands_sep - // and U002E as mon_decimal_point. - // TODO: Fix thousands_sep for 'char'. - // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006 -# if defined(_CS_GNU_LIBC_VERSION) - // FIXME libc++ specifically works around \u00A0 by translating it into - // a regular space. - const wchar_t wsep = glibc_version_less_than("2.27") ? L'\u00A0' : L'\u202F'; -# elif defined(_WIN32) || defined(_AIX) - const wchar_t wsep = L'\u00A0'; -# else - const wchar_t wsep = L' '; -# endif + const wchar_t wsep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP); + { Fwf f(LOCALE_ru_RU_UTF_8, 1); assert(f.thousands_sep() == wsep); diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp index 850352b3bc1ec..ccecd85f2ff87 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp @@ -14,6 +14,8 @@ // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} + // // template class numpunct_byname; @@ -25,6 +27,7 @@ #include #include "test_macros.h" +#include "locale_helpers.h" #include "platform_support.h" // locale name macros int main(int, char**) @@ -74,18 +77,11 @@ int main(int, char**) } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { -#if defined(_CS_GNU_LIBC_VERSION) - const wchar_t wsep = glibc_version_less_than("2.27") ? L' ' : L'\u202f'; -# elif defined(_AIX) - const wchar_t wsep = L'\u202F'; -# elif defined(_WIN32) - const wchar_t wsep = L'\u00A0'; -# else - const wchar_t wsep = L','; -# endif - typedef wchar_t C; - const std::numpunct& np = std::use_facet >(l); - assert(np.thousands_sep() == wsep); + const wchar_t wsep = LocaleHelpers::thousands_sep_or_default(FR_THOU_SEP); + + typedef wchar_t C; + const std::numpunct& np = std::use_facet >(l); + assert(np.thousands_sep() == wsep); } #endif // TEST_HAS_NO_WIDE_CHARACTERS } diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp index aecb96b58719e..ebf907a49c43e 100644 --- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp +++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp @@ -16,6 +16,9 @@ // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 +// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} +// ADDITIONAL_COMPILE_FLAGS: -DFR_DEC_POINT=%{LOCALE_CONV_FR_FR_UTF_8_DECIMAL_POINT} + // // template> class duration; @@ -33,6 +36,7 @@ #include #include "make_string.h" +#include "locale_helpers.h" #include "platform_support.h" // locale name macros #include "test_macros.h" @@ -88,21 +92,11 @@ static void test_values() { assert(stream_fr_FR_locale(1'000.123456s) == SV("1 000,1235s")); #endif } else { -#ifdef _WIN32 - assert(stream_fr_FR_locale(-1'000'000s) == SV("-1\u00A0000\u00A0000s")); - assert(stream_fr_FR_locale(1'000'000s) == SV("1\u00A0000\u00A0000s")); - assert(stream_fr_FR_locale(-1'000.123456s) == SV("-1\u00A0000,1235s")); - assert(stream_fr_FR_locale(1'000.123456s) == SV("1\u00A0000,1235s")); -#elif defined(__APPLE__) - assert(stream_fr_FR_locale(-1'000'000s) == SV("-1000000s")); - assert(stream_fr_FR_locale(1'000'000s) == SV("1000000s")); - assert(stream_fr_FR_locale(-1'000.123456s) == SV("-1000,1235s")); - assert(stream_fr_FR_locale(1'000.123456s) == SV("1000,1235s")); -#else - assert(stream_fr_FR_locale(-1'000'000s) == SV("-1\u202f000\u202f000s")); - assert(stream_fr_FR_locale(1'000'000s) == SV("1\u202f000\u202f000s")); - assert(stream_fr_FR_locale(-1'000.123456s) == SV("-1\u202f000,1235s")); - assert(stream_fr_FR_locale(1'000.123456s) == SV("1\u202f000,1235s")); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + assert(stream_fr_FR_locale(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s"); + assert(stream_fr_FR_locale(1'000'000s) == L"1" FR_THOU_SEP "000" FR_THOU_SEP "000s"); + assert(stream_fr_FR_locale(-1'000.123456s) == L"-1" FR_THOU_SEP "000" FR_DEC_POINT "1235s"); + assert(stream_fr_FR_locale(1'000.123456s) == L"1" FR_THOU_SEP "000" FR_DEC_POINT "1235s"); #endif } diff --git a/libcxx/test/support/locale_helpers.h b/libcxx/test/support/locale_helpers.h index 3eb24ebf28f52..946c2fed0f3a5 100644 --- a/libcxx/test/support/locale_helpers.h +++ b/libcxx/test/support/locale_helpers.h @@ -41,37 +41,6 @@ std::wstring convert_thousands_sep(std::wstring const& in, wchar_t sep) { return out; } -// GLIBC 2.27 and newer use U+202F NARROW NO-BREAK SPACE as a thousands separator. -// This function converts the spaces in string inputs to U+202F if need -// be. FreeBSD's locale data also uses U+202F, since 2018. -// Windows uses U+00A0 NO-BREAK SPACE. -std::wstring convert_thousands_sep_fr_FR(std::wstring const& in) { -#if defined(_CS_GNU_LIBC_VERSION) - if (glibc_version_less_than("2.27")) - return in; - else - return convert_thousands_sep(in, L'\u202F'); -#elif defined(__FreeBSD__) - return convert_thousands_sep(in, L'\u202F'); -#elif defined(_WIN32) - return convert_thousands_sep(in, L'\u00A0'); -#else - return in; -#endif -} - -// GLIBC 2.27 uses U+202F NARROW NO-BREAK SPACE as a thousands separator. -// FreeBSD, AIX and Windows use U+00A0 NO-BREAK SPACE. -std::wstring convert_thousands_sep_ru_RU(std::wstring const& in) { -#if defined(TEST_HAS_GLIBC) - return convert_thousands_sep(in, L'\u202F'); -# elif defined(__FreeBSD__) || defined(_WIN32) || defined(_AIX) - return convert_thousands_sep(in, L'\u00A0'); -# else - return in; -# endif -} - std::wstring negate_en_US(std::wstring s) { #if defined(_WIN32) return L"(" + s + L")"; @@ -80,6 +49,12 @@ std::wstring negate_en_US(std::wstring s) { #endif } +wchar_t thousands_sep_or_default(std::wstring s) { return !s.empty() ? s[0] : L','; } + +wchar_t mon_thousands_sep_or_default(std::wstring s) { return thousands_sep_or_default(s); } + +wchar_t decimal_point_or_default(std::wstring s) { return !s.empty() ? s[0] : L'.'; } + #endif // TEST_HAS_NO_WIDE_CHARACTERS std::string negate_en_US(std::string s) { diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index e4b413deff4db..a83dcd16b16f8 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -425,6 +425,10 @@ def _mingwSupportsModules(cfg): "fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"], "cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"], } +provide_locale_conversions = { + "fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"], + "ru_RU.UTF-8": ["mon_thousands_sep"], +} for locale, alts in locales.items(): # Note: Using alts directly in the lambda body here will bind it to the value at the # end of the loop. Assigning it to a default argument works around this issue. @@ -432,10 +436,95 @@ def _mingwSupportsModules(cfg): Feature( name="locale.{}".format(locale), when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts), - ) + actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction( + cfg, locale, alts, provide_locale_conversions[locale] + ) + if locale in provide_locale_conversions + and "_LIBCPP_HAS_NO_WIDE_CHARACTERS" not in compilerMacros(cfg) + else [], + ), ) +# Provide environment locale conversions through substitutions to avoid platform specific +# maintenance. +def _getLocaleFlagsAction(cfg, locale, alts, members): + alts_list = ",".join([f'"{l}"' for l in alts]) + get_member_list = ",".join([f"lc->{m}" for m in members]) + + localeconv_info = programOutput( + cfg, + r""" + #if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS) + #define _CRT_SECURE_NO_WARNINGS + #endif + #include + #include + #include + #include + + // Print each requested locale conversion member on separate lines. + int main() { + const char* locales[] = { %s }; + for (int loc_i = 0; loc_i < %d; ++loc_i) { + if (!setlocale(LC_ALL, locales[loc_i])) { + continue; // Choose first locale name that is recognized. + } + + lconv* lc = localeconv(); + const char* members[] = { %s }; + for (size_t m_i = 0; m_i < %d; ++m_i) { + if (!members[m_i]) { + printf("\n"); // member value is an empty string + continue; + } + + size_t len = mbstowcs(nullptr, members[m_i], 0); + if (len == static_cast(-1)) { + fprintf(stderr, "mbstowcs failed unexpectedly\n"); + return 1; + } + // Include room for null terminator. Use malloc as these features + // are also used by lit configs that don't use -lc++ (libunwind tests). + wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t)); + size_t ret = mbstowcs(dst, members[m_i], len + 1); + if (ret == static_cast(-1)) { + fprintf(stderr, "mbstowcs failed unexpectedly\n"); + free(dst); + return 1; + } + + for (size_t i = 0; i < len; ++i) { + if (dst[i] > 0x7F) { + printf("\\u%%04x", dst[i]); + } else { + // c++03 does not allow basic ascii-range characters in UCNs + printf("%%c", (char)dst[i]); + } + } + printf("\n"); + free(dst); + } + return 0; + } + + return 1; + } + """ + % (alts_list, len(alts), get_member_list, len(members)), + ) + valid_define_name = re.sub(r"[.-]", "_", locale).upper() + return [ + # Provide locale conversion through a substitution. + # Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f" + AddSubstitution( + f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}", + lambda cfg, value=value: f"'L\"{value}\"'", + ) + for member, value in zip(members, localeconv_info.split("\n")) + ] + + # Add features representing the target platform name: darwin, linux, windows, etc... DEFAULT_FEATURES += [ Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)), From b8e10ca59b6ad9d0c2828b418b5a17391ae6cda8 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 17 Mar 2025 22:13:51 -0400 Subject: [PATCH 60/83] [libc++] Fix check for _LIBCPP_HAS_NO_WIDE_CHARACTERS in features.py (#131675) The patch that added the new locale Lit features was created before we switched to a 0-1 macro for _LIBCPP_HAS_WIDE_CHARACTERS, leading to that patch referring to the obsolete _LIBCPP_HAS_NO_WIDE_CHARACTERS macro that is never defined nowadays. (cherry picked from commit 297f6d9f6b215bd7f58cf500b979b94dedbba7bb) --- libcxx/utils/libcxx/test/features.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index a83dcd16b16f8..10fc4b0afde6b 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -440,7 +440,8 @@ def _mingwSupportsModules(cfg): cfg, locale, alts, provide_locale_conversions[locale] ) if locale in provide_locale_conversions - and "_LIBCPP_HAS_NO_WIDE_CHARACTERS" not in compilerMacros(cfg) + and ("_LIBCPP_HAS_WIDE_CHARACTERS" not in compilerMacros(cfg) or + compilerMacros(cfg)["_LIBCPP_HAS_WIDE_CHARACTERS"] == "1") else [], ), ) From 337beb73abfe05c2db1158f211042eb8763165ea Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 7 May 2025 16:09:40 +0200 Subject: [PATCH 61/83] [libc++] Add _LIBCPP_NO_UNIQUE_ADDRESS to flat_{,multi}map::value_compare (#137594) This breaks the ABI of `flat_{,multi}map::value_compare`, but this type has only been introduced in LLVM 20, so it should be very unlikely that we break anybody if we back-port this now. (cherry picked from commit ed0aa9961caa177098e9b7e69e98034d676f192e) --- libcxx/include/__flat_map/flat_map.h | 2 +- libcxx/include/__flat_map/flat_multimap.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h index a0594ed9dc411..9cc39c0a1e067 100644 --- a/libcxx/include/__flat_map/flat_map.h +++ b/libcxx/include/__flat_map/flat_map.h @@ -113,7 +113,7 @@ class flat_map { class value_compare { private: - key_compare __comp_; + _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_; _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {} friend flat_map; diff --git a/libcxx/include/__flat_map/flat_multimap.h b/libcxx/include/__flat_map/flat_multimap.h index ea77fb5d79bd2..15fcd7995ad0a 100644 --- a/libcxx/include/__flat_map/flat_multimap.h +++ b/libcxx/include/__flat_map/flat_multimap.h @@ -115,7 +115,7 @@ class flat_multimap { class value_compare { private: - key_compare __comp_; + _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_; _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {} friend flat_multimap; From 6fa0cdf3720b8b9103f31009c089b56a9a176653 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 3 Jun 2025 09:51:37 -0700 Subject: [PATCH 62/83] release/20.x: [clang] Don't evaluate the initializer of constexpr-unknown parameters. (#142498) Backport 97885213bd4507b204b050c3cd570e365d21cc7d --- clang/lib/AST/ExprConstant.cpp | 7 ++++++- clang/test/SemaCXX/constant-expression-p2280r4.cpp | 12 ++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e0746f4532245..209b269122a8e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3525,7 +3525,12 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, // should begin within the evaluation of E // Used to be C++20 [expr.const]p5.12.2: // ... its lifetime began within the evaluation of E; - if (isa(VD) && !AllowConstexprUnknown) { + if (isa(VD)) { + if (AllowConstexprUnknown) { + Result = &Info.CurrentCall->createConstexprUnknownAPValues(VD, Base); + return true; + } + // Assume parameters of a potential constant expression are usable in // constant expressions. if (!Info.checkingPotentialConstantExpression() || diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp index 87beeb4d3dc84..dbaebb81b93e8 100644 --- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp +++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp @@ -200,3 +200,15 @@ int f() { return !get_value(); // contextually convert the function call result to bool } } + +namespace param_reference { + constexpr int arbitrary = -12345; + constexpr void f(const int &x = arbitrary) { // expected-note {{declared here}} + constexpr const int &v1 = x; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{reference to 'x' is not a constant expression}} + constexpr const int &v2 = (x, arbitrary); // expected-warning {{left operand of comma operator has no effect}} + constexpr int v3 = x; // expected-error {{must be initialized by a constant expression}} + static_assert(x==arbitrary); // expected-error {{static assertion expression is not an integral constant expression}} + static_assert(&x - &x == 0); + } +} From c4f257cb74b54c271bbff1649b71c40e8d8c50d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 10 Jun 2025 10:23:19 +0300 Subject: [PATCH 63/83] [llvm-rc] Allow ALT on non-virtkey accelerators (#143374) While https://learn.microsoft.com/en-us/windows/win32/menurc/accelerators-resource specifies that ALT only applies to virtkeys, this doesn't seem to be the case in reality. https://learn.microsoft.com/en-us/windows/win32/menurc/using-keyboard-accelerators contains an example that uses this combination: "B", ID_ACCEL5, ALT ; ALT_SHIFT+B Also Microsoft also includes such cases in their repo of test cases: https://github.com/microsoft/Windows-classic-samples/blob/263dd514ad215d0a40d1ec44b4df84b30ec11dcf/Samples/Win7Samples/begin/sdkdiff/sdkdiff.rc#L161-L164 Also MS rc.exe doesn't warn/error about this. However if applying SHIFT or CONTROL on a non-virtkey accelerator, MS rc.exe does produce this warning: warning RC4203 : SHIFT or CONTROL used without VIRTKEY Hence, keep the checks for SHIFT and CONTROL, but remove the checks for ALT, which seems to have been incorrect. This fixes one aspect of https://github.com/llvm/llvm-project/issues/143157. (cherry picked from commit 77347d6513de6a6f5dee8ade76e0a0ad1552c12b) --- .../llvm-rc/Inputs/tag-accelerators-ascii-alt.rc | 4 ---- .../test/tools/llvm-rc/Inputs/tag-accelerators.rc | 1 + llvm/test/tools/llvm-rc/tag-accelerators.test | 15 +++++---------- llvm/tools/llvm-rc/ResourceFileWriter.cpp | 4 ++-- 4 files changed, 8 insertions(+), 16 deletions(-) delete mode 100644 llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc diff --git a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc b/llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc deleted file mode 100644 index 363263bfe4cf2..0000000000000 --- a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators-ascii-alt.rc +++ /dev/null @@ -1,4 +0,0 @@ -2 ACCELERATORS { - "A", 15, ASCII, ALT -} - diff --git a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc b/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc index 90e7f926cc087..bcfc35bdeab68 100644 --- a/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc +++ b/llvm/test/tools/llvm-rc/Inputs/tag-accelerators.rc @@ -110,5 +110,6 @@ LANGUAGE 5, 1 "7", 71, VIRTKEY, NOINVERT, CONTROL, SHIFT, ALT "^j", 72, ASCII "^j", 73, ASCII, NOINVERT + "A", 15, ASCII, ALT } diff --git a/llvm/test/tools/llvm-rc/tag-accelerators.test b/llvm/test/tools/llvm-rc/tag-accelerators.test index 336727f617687..4f44aebc75011 100644 --- a/llvm/test/tools/llvm-rc/tag-accelerators.test +++ b/llvm/test/tools/llvm-rc/tag-accelerators.test @@ -37,7 +37,7 @@ ; ACCELERATORS-NEXT: Version (major): 0 ; ACCELERATORS-NEXT: Version (minor): 0 ; ACCELERATORS-NEXT: Characteristics: 0 -; ACCELERATORS-NEXT: Data size: 592 +; ACCELERATORS-NEXT: Data size: 600 ; ACCELERATORS-NEXT: Data: ( ; ACCELERATORS-NEXT: 0000: 00002A00 00000000 01002A00 01000000 |..*.......*.....| ; ACCELERATORS-NEXT: 0010: 02002A00 02000000 03002A00 03000000 |..*.......*.....| @@ -75,7 +75,8 @@ ; ACCELERATORS-NEXT: 0210: 15003700 42000000 0F003700 43000000 |..7.B.....7.C...| ; ACCELERATORS-NEXT: 0220: 1B003700 44000000 17003700 45000000 |..7.D.....7.E...| ; ACCELERATORS-NEXT: 0230: 1D003700 46000000 1F003700 47000000 |..7.F.....7.G...| -; ACCELERATORS-NEXT: 0240: 00000A00 48000000 82000A00 49000000 |....H.......I...| +; ACCELERATORS-NEXT: 0240: 00000A00 48000000 02000A00 49000000 |....H.......I...| +; ACCELERATORS-NEXT: 0250: 90004100 0F000000 |..A.....| ; ACCELERATORS-NEXT: ) @@ -94,19 +95,13 @@ ; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-ascii-control.rc 2>&1 | FileCheck %s --check-prefix ASCII2 ; ASCII2: llvm-rc: Error in ACCELERATORS statement (ID 2): -; ASCII2-NEXT: Accelerator ID 15: Can only apply ALT, SHIFT or CONTROL to VIRTKEY accelerators +; ASCII2-NEXT: Accelerator ID 15: Can only apply SHIFT or CONTROL to VIRTKEY accelerators ; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-ascii-shift.rc 2>&1 | FileCheck %s --check-prefix ASCII3 ; ASCII3: llvm-rc: Error in ACCELERATORS statement (ID 2): -; ASCII3-NEXT: Accelerator ID 15: Can only apply ALT, SHIFT or CONTROL to VIRTKEY accelerators - - -; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-ascii-alt.rc 2>&1 | FileCheck %s --check-prefix ASCII4 - -; ASCII4: llvm-rc: Error in ACCELERATORS statement (ID 2): -; ASCII4-NEXT: Accelerator ID 15: Can only apply ALT, SHIFT or CONTROL to VIRTKEY accelerators +; ASCII3-NEXT: Accelerator ID 15: Can only apply SHIFT or CONTROL to VIRTKEY accelerators ; RUN: not llvm-rc -no-preprocess /FO %t -- %p/Inputs/tag-accelerators-bad-key-id.rc 2>&1 | FileCheck %s --check-prefix BADKEYID diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp index 85b59532bb83b..35c0768e33322 100644 --- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp +++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp @@ -631,8 +631,8 @@ Error ResourceFileWriter::writeSingleAccelerator( if (IsASCII && IsVirtKey) return createAccError("Accelerator can't be both ASCII and VIRTKEY"); - if (!IsVirtKey && (Obj.Flags & (Opt::ALT | Opt::SHIFT | Opt::CONTROL))) - return createAccError("Can only apply ALT, SHIFT or CONTROL to VIRTKEY" + if (!IsVirtKey && (Obj.Flags & (Opt::SHIFT | Opt::CONTROL))) + return createAccError("Can only apply SHIFT or CONTROL to VIRTKEY" " accelerators"); if (Obj.Event.isInt()) { From 02aec86e4d0d1740fd6ca5a01b3154938682910d Mon Sep 17 00:00:00 2001 From: fleeting-xx Date: Thu, 5 Jun 2025 20:33:11 -0500 Subject: [PATCH 64/83] [clangd] [Modules] Fix to correctly handle module dependencies (#142828) This is a re-application of llvm/llvm-project#142090 without the unit test changes. A subsequent PR will follow that adds a unit test for module dependencies. - Fix dangling string references in the return value of getAllRequiredModules() - Change a couple of calls in getOrBuildModuleFile() to use the loop variable instead of the ModuleName parameter. --- clang-tools-extra/clangd/ModulesBuilder.cpp | 18 ++-- clang-tools-extra/clangd/ProjectModules.h | 2 +- .../clangd/ScanningProjectModules.cpp | 6 +- .../clangd/test/module_dependencies.test | 96 +++++++++++++++++++ 4 files changed, 110 insertions(+), 12 deletions(-) create mode 100644 clang-tools-extra/clangd/test/module_dependencies.test diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index bee31fe51555e..2d2f0f6374486 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -360,9 +360,9 @@ void ModuleFileCache::remove(StringRef ModuleName) { /// Collect the directly and indirectly required module names for \param /// ModuleName in topological order. The \param ModuleName is guaranteed to /// be the last element in \param ModuleNames. -llvm::SmallVector getAllRequiredModules(ProjectModules &MDB, +llvm::SmallVector getAllRequiredModules(ProjectModules &MDB, StringRef ModuleName) { - llvm::SmallVector ModuleNames; + llvm::SmallVector ModuleNames; llvm::StringSet<> ModuleNamesSet; auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void { @@ -373,7 +373,7 @@ llvm::SmallVector getAllRequiredModules(ProjectModules &MDB, if (ModuleNamesSet.insert(RequiredModuleName).second) Visitor(RequiredModuleName, Visitor); - ModuleNames.push_back(ModuleName); + ModuleNames.push_back(ModuleName.str()); }; VisitDeps(ModuleName, VisitDeps); @@ -418,13 +418,13 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( // Get Required modules in topological order. auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName); for (llvm::StringRef ReqModuleName : ReqModuleNames) { - if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) + if (BuiltModuleFiles.isModuleUnitBuilt(ReqModuleName)) continue; if (auto Cached = Cache.getModule(ReqModuleName)) { if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles, TFS.view(std::nullopt))) { - log("Reusing module {0} from {1}", ModuleName, + log("Reusing module {0} from {1}", ReqModuleName, Cached->getModuleFilePath()); BuiltModuleFiles.addModuleFile(std::move(Cached)); continue; @@ -432,14 +432,16 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( Cache.remove(ReqModuleName); } + std::string ReqFileName = + MDB.getSourceForModuleName(ReqModuleName); llvm::Expected MF = buildModuleFile( - ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles); + ReqModuleName, ReqFileName, getCDB(), TFS, BuiltModuleFiles); if (llvm::Error Err = MF.takeError()) return Err; - log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath()); + log("Built module {0} to {1}", ReqModuleName, MF->getModuleFilePath()); auto BuiltModuleFile = std::make_shared(std::move(*MF)); - Cache.add(ModuleName, BuiltModuleFile); + Cache.add(ReqModuleName, BuiltModuleFile); BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile)); } diff --git a/clang-tools-extra/clangd/ProjectModules.h b/clang-tools-extra/clangd/ProjectModules.h index 48d52ac9deb89..5296508e0584d 100644 --- a/clang-tools-extra/clangd/ProjectModules.h +++ b/clang-tools-extra/clangd/ProjectModules.h @@ -42,7 +42,7 @@ class ProjectModules { llvm::unique_function; virtual std::vector getRequiredModules(PathRef File) = 0; - virtual PathRef + virtual std::string getSourceForModuleName(llvm::StringRef ModuleName, PathRef RequiredSrcFile = PathRef()) = 0; diff --git a/clang-tools-extra/clangd/ScanningProjectModules.cpp b/clang-tools-extra/clangd/ScanningProjectModules.cpp index e4dc11c1c2895..859aba3673dc4 100644 --- a/clang-tools-extra/clangd/ScanningProjectModules.cpp +++ b/clang-tools-extra/clangd/ScanningProjectModules.cpp @@ -66,7 +66,7 @@ class ModuleDependencyScanner { /// /// TODO: We should handle the case that there are multiple source files /// declaring the same module. - PathRef getSourceForModuleName(llvm::StringRef ModuleName) const; + std::string getSourceForModuleName(llvm::StringRef ModuleName) const; /// Return the direct required modules. Indirect required modules are not /// included. @@ -140,7 +140,7 @@ void ModuleDependencyScanner::globalScan( GlobalScanned = true; } -PathRef ModuleDependencyScanner::getSourceForModuleName( +std::string ModuleDependencyScanner::getSourceForModuleName( llvm::StringRef ModuleName) const { assert( GlobalScanned && @@ -189,7 +189,7 @@ class ScanningAllProjectModules : public ProjectModules { /// RequiredSourceFile is not used intentionally. See the comments of /// ModuleDependencyScanner for detail. - PathRef + std::string getSourceForModuleName(llvm::StringRef ModuleName, PathRef RequiredSourceFile = PathRef()) override { Scanner.globalScan(Mangler); diff --git a/clang-tools-extra/clangd/test/module_dependencies.test b/clang-tools-extra/clangd/test/module_dependencies.test new file mode 100644 index 0000000000000..1023b2363c9fa --- /dev/null +++ b/clang-tools-extra/clangd/test/module_dependencies.test @@ -0,0 +1,96 @@ +# A smoke test to check that a simple dependency chain for modules can work. +# +# FIXME: This fails on the Windows ARM64 build server. Not entirely sure why as it has been tested on +# an ARM64 Windows VM and appears to work there. +# UNSUPPORTED: host=aarch64-pc-windows-msvc +# +# RUN: rm -fr %t +# RUN: mkdir -p %t +# RUN: split-file %s %t +# +# RUN: sed -e "s|DIR|%/t|g" %t/compile_commands.json.tmpl > %t/compile_commands.json.tmp +# RUN: sed -e "s|CLANG_CC|%clang|g" %t/compile_commands.json.tmp > %t/compile_commands.json +# RUN: sed -e "s|DIR|%/t|g" %t/definition.jsonrpc.tmpl > %t/definition.jsonrpc.tmp +# +# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..." +# (with the extra slash in the front), so we add it here. +# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t/definition.jsonrpc.tmp > %/t/definition.jsonrpc +# +# RUN: clangd -experimental-modules-support -lit-test < %t/definition.jsonrpc \ +# RUN: | FileCheck -strict-whitespace %t/definition.jsonrpc + +#--- A-frag.cppm +export module A:frag; +export void printA() {} + +#--- A.cppm +export module A; +export import :frag; + +#--- Use.cpp +import A; +void foo() { + print +} + +#--- compile_commands.json.tmpl +[ + { + "directory": "DIR", + "command": "CLANG_CC -fprebuilt-module-path=DIR -std=c++20 -o DIR/main.cpp.o -c DIR/Use.cpp", + "file": "DIR/Use.cpp" + }, + { + "directory": "DIR", + "command": "CLANG_CC -std=c++20 DIR/A.cppm --precompile -o DIR/A.pcm", + "file": "DIR/A.cppm" + }, + { + "directory": "DIR", + "command": "CLANG_CC -std=c++20 DIR/A-frag.cppm --precompile -o DIR/A-frag.pcm", + "file": "DIR/A-frag.cppm" + } +] + +#--- definition.jsonrpc.tmpl +{ + "jsonrpc": "2.0", + "id": 0, + "method": "initialize", + "params": { + "processId": 123, + "rootPath": "clangd", + "capabilities": { + "textDocument": { + "completion": { + "completionItem": { + "snippetSupport": true + } + } + } + }, + "trace": "off" + } +} +--- +{ + "jsonrpc": "2.0", + "method": "textDocument/didOpen", + "params": { + "textDocument": { + "uri": "file://DIR/Use.cpp", + "languageId": "cpp", + "version": 1, + "text": "import A;\nvoid foo() {\n print\n}\n" + } + } +} + +# CHECK: "message"{{.*}}printA{{.*}}(fix available) + +--- +{"jsonrpc":"2.0","id":1,"method":"textDocument/completion","params":{"textDocument":{"uri":"file://DIR/Use.cpp"},"context":{"triggerKind":1},"position":{"line":2,"character":6}}} +--- +{"jsonrpc":"2.0","id":2,"method":"shutdown"} +--- +{"jsonrpc":"2.0","method":"exit"} From 199e02a3643318a16a0a7fdcc677ac55cf769fdb Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 10 Jun 2025 13:04:51 +0200 Subject: [PATCH 65/83] Disable clangd/test/module_dependencies.test on Windows The test fails (sometimes); see discussion on https://github.com/llvm/llvm-project/pull/142828 --- clang-tools-extra/clangd/test/module_dependencies.test | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/test/module_dependencies.test b/clang-tools-extra/clangd/test/module_dependencies.test index 1023b2363c9fa..79306a73da435 100644 --- a/clang-tools-extra/clangd/test/module_dependencies.test +++ b/clang-tools-extra/clangd/test/module_dependencies.test @@ -1,8 +1,7 @@ # A smoke test to check that a simple dependency chain for modules can work. # -# FIXME: This fails on the Windows ARM64 build server. Not entirely sure why as it has been tested on -# an ARM64 Windows VM and appears to work there. -# UNSUPPORTED: host=aarch64-pc-windows-msvc +# FIXME: The test fails on Windows; see comments on https://github.com/llvm/llvm-project/pull/142828 +# UNSUPPORTED: system-windows # # RUN: rm -fr %t # RUN: mkdir -p %t From 9ba132be8eea545845cb22344ace56cdd43637d5 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Mon, 2 Jun 2025 20:14:28 +0530 Subject: [PATCH 66/83] [clan-reply] Backport PTU error recovery to 20.x This cherry-picks 3b4c51bb3243a02526313c51207a674139b67a00 and beffd1509af7b12eeab0d5ae85b2f8322e039287 to 20.x. Which are: [clang-repl] Fix error recovery while PTU cleanup (#127467) [clang][Interpreter] Disable part of lambda test on Windows The latter commit avoids a test failure seen in Windows builds. On main, I turned off one of the RUN lines for Windows, but reviewers on the cherry-pick preferred UNSUPPORTED to disable the whole test. So I have used UNSUPPORTED in this version for 20.x. --- clang/lib/Interpreter/IncrementalParser.cpp | 2 +- clang/test/Interpreter/lambda.cpp | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index e43cea1baf43a..1d223e230669c 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -175,7 +175,7 @@ void IncrementalParser::CleanUpPTU(TranslationUnitDecl *MostRecentTU) { // FIXME: We should de-allocate MostRecentTU for (Decl *D : MostRecentTU->decls()) { auto *ND = dyn_cast(D); - if (!ND) + if (!ND || ND->getDeclName().isEmpty()) continue; // Check if we need to clean up the IdResolver chain. if (ND->getDeclName().getFETokenInfo() && !D->getLangOpts().ObjC && diff --git a/clang/test/Interpreter/lambda.cpp b/clang/test/Interpreter/lambda.cpp index df75274a050b2..fee6c73bf95cb 100644 --- a/clang/test/Interpreter/lambda.cpp +++ b/clang/test/Interpreter/lambda.cpp @@ -1,7 +1,11 @@ // REQUIRES: host-supports-jit // UNSUPPORTED: system-aix +// At -O2, somehow "x = 42" appears first when piped into FileCheck, +// see https://github.com/llvm/llvm-project/issues/143547. +// UNSUPPORTED: system-windows // RUN: cat %s | clang-repl | FileCheck %s -// RUN: cat %s | clang-repl -Xcc -O2 | FileCheck %s +// RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify -Xcc -O2 | FileCheck %s + extern "C" int printf(const char *, ...); auto l1 = []() { printf("ONE\n"); return 42; }; @@ -14,4 +18,14 @@ auto r2 = l2(); auto r3 = l2(); // CHECK: TWO -%quit +// Verify non-local lambda capture error is correctly reported +int x = 42; + +// expected-error {{non-local lambda expression cannot have a capture-default}} +auto capture = [&]() { return x * 2; }; + +// Ensure interpreter continues and x is still valid +printf("x = %d\n", x); +// CHECK: x = 42 + +%quit \ No newline at end of file From 6146a88f60492b520a36f8f8f3231e15f3cc6082 Mon Sep 17 00:00:00 2001 From: Ami-zhang Date: Thu, 12 Jun 2025 20:11:14 +0800 Subject: [PATCH 67/83] [LoongArch] Fix '-mno-lsx' option not disabling LASX feature (#143821) When '-march' with LASX feature and '-mno-lsx' options are used together, '-mno-lsx' fails to disable LASX, leaving 'HasFeatureLASX=true' and causing incorrect '__loongarch_sx/asx=1' macro definition. Fixes https://github.com/loongson-community/discussions/issues/95 (cherry picked from commit 2ecbfc0beb42abbbd2c3d28bfd576b38c44a5b46) --- clang/lib/Driver/ToolChains/Arch/LoongArch.cpp | 1 + clang/test/Preprocessor/init-loongarch.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 0575a1ebef3a6..1666253db54cb 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -253,6 +253,7 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, Features.push_back("+lsx"); } else /*-mno-lsx*/ { Features.push_back("-lsx"); + Features.push_back("-lasx"); } } diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index ac461b371162f..71a266b8a9157 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -946,6 +946,10 @@ // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s // RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ // RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -march=la464 -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -march=la464 -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s // MNO-LSX-NOT: #define __loongarch_asx // MNO-LSX-NOT: #define __loongarch_simd_width // MNO-LSX-NOT: #define __loongarch_sx From b83658b7e2c876a627a06c56196eff06b2441c9d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 7 Jul 2025 16:43:58 -0700 Subject: [PATCH 68/83] Bump version to 20.1.8 --- cmake/Modules/LLVMVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 1887043b7612a..34900b999a4ae 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 7) + set(LLVM_VERSION_PATCH 8) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) From 5ac3ce8196886f245cdaa58e87036e19e7857081 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 2 Jul 2025 05:26:30 +0200 Subject: [PATCH 69/83] [WebAssembly] Fix inline assembly with vector types (#146574) This commit fixes using inline assembly with v128 results. Previously this failed with an internal assertion about a failure to legalize a `CopyFromReg` where the source register was typed `v8f16`. It looks like the type used for the destination register was whatever was listed first in the `def V128 : WebAssemblyRegClass` listing, so the types were shuffled around to have a default-supported type. A small test was added as well which failed to generate previously and should now pass in generation. This test passed on LLVM 18 additionally and regressed by accident in #93228 which was first included in LLVM 19. (cherry picked from commit a8a9a7f95a695c02bdf3d5821d1c62cc8e08c2ff) --- .../lib/Target/WebAssembly/WebAssemblyRegisterInfo.td | 4 ++-- llvm/test/CodeGen/WebAssembly/inline-asm.ll | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 17889dacc868c..31a33c1e7365b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -64,8 +64,8 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32, I32_0)>; def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64, I64_0)>; def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; -def V128 : WebAssemblyRegClass<[v8f16, v4f32, v2f64, v2i64, v4i32, v16i8, - v8i16], +def V128 : WebAssemblyRegClass<[v2i64, v4i32, v16i8, v8i16, + v8f16, v4f32, v2f64], 128, (add V128_0)>; def FUNCREF : WebAssemblyRegClass<[funcref], 0, (add FUNCREF_0)>; def EXTERNREF : WebAssemblyRegClass<[externref], 0, (add EXTERNREF_0)>; diff --git a/llvm/test/CodeGen/WebAssembly/inline-asm.ll b/llvm/test/CodeGen/WebAssembly/inline-asm.ll index 4462cfb7aa0c4..c378fd953a555 100644 --- a/llvm/test/CodeGen/WebAssembly/inline-asm.ll +++ b/llvm/test/CodeGen/WebAssembly/inline-asm.ll @@ -129,7 +129,18 @@ entry: ret i32 %ret } +; CHECK-LABEL: v128_load +; CHECK: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: local.set 1 +define <4 x i32> @v128_load(ptr %v) #1 { +entry: + %0 = tail call <4 x i32> asm "local.get $1\0Av128.load 0\0Alocal.set $0", "=r,r"(ptr %v) + ret <4 x i32> %0 +} + attributes #0 = { nounwind } +attributes #1 = { "target-features"="+simd128" } !0 = !{i32 47} !1 = !{i32 145} From 5532d5b745e4c89b1c0fd0a4e67529d13e1a282b Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 26 Jun 2025 13:26:33 +0100 Subject: [PATCH 70/83] [AArch64] Ensure the LR is preserved if we must call __arm_get_current_vg (#145760) Fixes #145635 (cherry picked from commit af7166a3f126ce4e4d2a05eccc1358bd0427cf0f) --- .../Target/AArch64/AArch64FrameLowering.cpp | 10 +++- .../AArch64/sme-must-save-lr-for-vg.ll | 49 +++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index d3abd79b85a75..74b80438a28b2 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3792,6 +3792,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, CSStackSize += SpillSize; } + // Save number of saved regs, so we can easily update CSStackSize later to + // account for any additional 64-bit GPR saves. Note: After this point + // only 64-bit GPRs can be added to SavedRegs. + unsigned NumSavedRegs = SavedRegs.count(); + // Increase the callee-saved stack size if the function has streaming mode // changes, as we will need to spill the value of the VG register. // For locally streaming functions, we spill both the streaming and @@ -3811,8 +3816,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (AFI->hasStackHazardSlotIndex()) CSStackSize += getStackHazardSize(MF); - // Save number of saved regs, so we can easily update CSStackSize later. - unsigned NumSavedRegs = SavedRegs.count(); + // If we must call __arm_get_current_vg in the prologue preserve the LR. + if (requiresSaveVG(MF) && !Subtarget.hasSVE()) + SavedRegs.set(AArch64::LR); // The frame record needs to be created by saving the appropriate registers uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll new file mode 100644 index 0000000000000..69f603458670c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s + +; Example of locally streaming function that (at -O0) must preserve the LR (X30) +; before calling __arm_get_current_vg. +define void @foo() "aarch64_pstate_sm_body" { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: lsr x9, x9, #3 +; CHECK-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_get_current_vg +; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret + ret void +} From 65ce78f338cf44aee7ecfa23686851f89e036b25 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Thu, 26 Jun 2025 18:42:25 +0800 Subject: [PATCH 71/83] [LoongArch] Pre-commit test for fixing xvshuf instructions. NFC For this test, the `xvshuf.d` instruction should not be generated. This will be fixed later. (cherry picked from commit a19ddff980136835fead07b346bd83e9211124a0) --- .../LoongArch/lasx/ir-instruction/fix-xvshuf.ll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll new file mode 100644 index 0000000000000..21067031cb7bb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; Fix https://github.com/llvm/llvm-project/issues/137000. + +define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} From da18fb9f04ceee5e78b19c93ae755800b586e958 Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Fri, 27 Jun 2025 10:29:32 +0800 Subject: [PATCH 72/83] [LoongArch] Fix xvshuf instructions lowering (#145868) Fix https://github.com/llvm/llvm-project/issues/137000. (cherry picked from commit 30e519e1ad185701eb9593f6c727c808d7590d1b) --- .../LoongArch/LoongArchISelLowering.cpp | 2 +- .../lasx/ir-instruction/fix-xvshuf.ll | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4ed3c3cf92e3e..98b7a1126e560 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1209,7 +1209,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, if (*it < 0) // UNDEF MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); else if ((*it >= 0 && *it < HalfSize) || - (*it >= MaskSize && *it <= MaskSize + HalfSize)) { + (*it >= MaskSize && *it < MaskSize + HalfSize)) { int M = *it < HalfSize ? *it : *it - HalfSize; MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); } else diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 21067031cb7bb..f3bec11810e9b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -6,9 +6,22 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.d $fa2, $a0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2 +; CHECK-NEXT: movgr2fr.d $fa3, $a0 +; CHECK-NEXT: movfr2gr.d $a0, $fa2 +; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 0 +; CHECK-NEXT: movfr2gr.d $a0, $fa3 +; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 1 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3 +; CHECK-NEXT: movgr2fr.d $fa1, $a0 +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 2 +; CHECK-NEXT: movfr2gr.d $a0, $fa1 +; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 3 ; CHECK-NEXT: xvori.b $xr0, $xr2, 0 ; CHECK-NEXT: ret entry: From b21155f97a0a9682fdea0cb074c8f4687a4cbd35 Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Sat, 7 Jun 2025 15:10:24 +0800 Subject: [PATCH 73/83] [LoongArch] Precommit test case to show bug in LoongArchISelDagToDag The optimization level should not be restored into O2. (cherry picked from commit fcc82cfa9394b2bd4380acdcf0e2854caee5a47a) --- llvm/test/CodeGen/LoongArch/isel-optnone.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/isel-optnone.ll diff --git a/llvm/test/CodeGen/LoongArch/isel-optnone.ll b/llvm/test/CodeGen/LoongArch/isel-optnone.ll new file mode 100644 index 0000000000000..d44f1405d0c18 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/isel-optnone.ll @@ -0,0 +1,13 @@ +; REQUIRES: asserts +; RUN: llc %s -O0 -mtriple=loongarch64 -o /dev/null -debug-only=isel 2>&1 | FileCheck %s + +define void @fooOptnone() #0 { +; CHECK: Changing optimization level for Function fooOptnone +; CHECK: Before: -O2 ; After: -O0 + +; CHECK: Restoring optimization level for Function fooOptnone +; CHECK: Before: -O0 ; After: -O2 + ret void +} + +attributes #0 = { nounwind optnone noinline } From 1daceb20611fe04313cb024aa427793039267f8c Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Sat, 7 Jun 2025 11:45:39 +0800 Subject: [PATCH 74/83] [LoongArch] Pass OptLevel to LoongArchDAGToDAGISel correctly Like many other targets did. And see RISCV for similar fix. Fix https://github.com/llvm/llvm-project/issues/143239 (cherry picked from commit 90a52f4942961a5c32afc69d69470c6b7e5bcb8a) --- llvm/lib/Target/LoongArch/LoongArch.h | 3 ++- llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp | 10 ++++++---- llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h | 8 +++++--- llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp | 2 +- llvm/test/CodeGen/LoongArch/O0-pipeline.ll | 8 -------- llvm/test/CodeGen/LoongArch/isel-optnone.ll | 7 ++----- llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll | 1 + 7 files changed, 17 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h index db60523738880..6635c57ff0476 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -35,7 +35,8 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, FunctionPass *createLoongArchDeadRegisterDefinitionsPass(); FunctionPass *createLoongArchExpandAtomicPseudoPass(); -FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); +FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, + CodeGenOptLevel OptLevel); FunctionPass *createLoongArchMergeBaseOffsetOptPass(); FunctionPass *createLoongArchOptWInstrsPass(); FunctionPass *createLoongArchPreRAExpandPseudoPass(); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index cb0fb9bc9c7f9..7169cdc9a2bf9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -25,8 +25,9 @@ using namespace llvm; char LoongArchDAGToDAGISelLegacy::ID; LoongArchDAGToDAGISelLegacy::LoongArchDAGToDAGISelLegacy( - LoongArchTargetMachine &TM) - : SelectionDAGISelLegacy(ID, std::make_unique(TM)) {} + LoongArchTargetMachine &TM, CodeGenOptLevel OptLevel) + : SelectionDAGISelLegacy( + ID, std::make_unique(TM, OptLevel)) {} INITIALIZE_PASS(LoongArchDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) @@ -456,6 +457,7 @@ bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, // This pass converts a legalized DAG into a LoongArch-specific DAG, ready // for instruction scheduling. -FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { - return new LoongArchDAGToDAGISelLegacy(TM); +FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM, + CodeGenOptLevel OptLevel) { + return new LoongArchDAGToDAGISelLegacy(TM, OptLevel); } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 8a7eba418d804..2e6bc9951e9e7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -26,8 +26,9 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel { public: LoongArchDAGToDAGISel() = delete; - explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM) - : SelectionDAGISel(TM) {} + explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, + CodeGenOptLevel OptLevel) + : SelectionDAGISel(TM, OptLevel) {} bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget(); @@ -71,7 +72,8 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel { class LoongArchDAGToDAGISelLegacy : public SelectionDAGISelLegacy { public: static char ID; - explicit LoongArchDAGToDAGISelLegacy(LoongArchTargetMachine &TM); + explicit LoongArchDAGToDAGISelLegacy(LoongArchTargetMachine &TM, + CodeGenOptLevel OptLevel); }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 62b08be5435cd..27f97b2cebb0c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -188,7 +188,7 @@ void LoongArchPassConfig::addCodeGenPrepare() { } bool LoongArchPassConfig::addInstSelector() { - addPass(createLoongArchISelDag(getLoongArchTargetMachine())); + addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel())); return false; } diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll index 24bd4c75a9821..73d0bda895de0 100644 --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -34,15 +34,7 @@ ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier -; CHECK-NEXT: Dominator Tree Construction -; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Natural Loop Information -; CHECK-NEXT: Post-Dominator Tree Construction -; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Assignment Tracking Analysis -; CHECK-NEXT: Lazy Branch Probability Analysis -; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: LoongArch DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation diff --git a/llvm/test/CodeGen/LoongArch/isel-optnone.ll b/llvm/test/CodeGen/LoongArch/isel-optnone.ll index d44f1405d0c18..4d2528a3148ac 100644 --- a/llvm/test/CodeGen/LoongArch/isel-optnone.ll +++ b/llvm/test/CodeGen/LoongArch/isel-optnone.ll @@ -2,11 +2,8 @@ ; RUN: llc %s -O0 -mtriple=loongarch64 -o /dev/null -debug-only=isel 2>&1 | FileCheck %s define void @fooOptnone() #0 { -; CHECK: Changing optimization level for Function fooOptnone -; CHECK: Before: -O2 ; After: -O0 - -; CHECK: Restoring optimization level for Function fooOptnone -; CHECK: Before: -O0 ; After: -O2 +; CHECK-NOT: Changing optimization level for Function fooOptnone +; CHECK-NOT: Restoring optimization level for Function fooOptnone ret void } diff --git a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll index 08534e307e4e0..c1b1c1f7568bb 100644 --- a/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll +++ b/llvm/test/CodeGen/LoongArch/spill-ra-without-kill.ll @@ -39,6 +39,7 @@ define dso_local ptr @f(i32 noundef signext %i) "frame-pointer"="all" { ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: ld.d $a0, $fp, -48 # 8-byte Folded Reload +; CHECK-NEXT: addi.w $a0, $a0, 0 ; CHECK-NEXT: ori $a1, $zero, 1 ; CHECK-NEXT: bne $a0, $a1, .LBB0_6 ; CHECK-NEXT: b .LBB0_4 From 9af763f038f7cf74335cf74eadc5eea4b2026f44 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Fri, 27 Jun 2025 16:56:17 -0400 Subject: [PATCH 75/83] [gtest] Fix building on OpenBSD/sparc64 (#145225) Cherry pick a patch from 1.15.0 Add missing include for raise(3) https://github.com/google/googletest/commit/7f036c5563af7d0329f20e8bb42effb04629f0c0 (cherry picked from commit 68239b76f139e44d24f3949383e3fd4bf389e1c9) --- third-party/unittest/googletest/src/gtest.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third-party/unittest/googletest/src/gtest.cc b/third-party/unittest/googletest/src/gtest.cc index 30a5cc3f83a7e..37d380a789831 100644 --- a/third-party/unittest/googletest/src/gtest.cc +++ b/third-party/unittest/googletest/src/gtest.cc @@ -43,6 +43,7 @@ #include #include // NOLINT #include +#include #include #include #include From 0de59a293f7a7b941a17c1222d8ec5a3a0cad9ae Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Tue, 18 Mar 2025 13:04:23 +0100 Subject: [PATCH 76/83] [X86] Ignore NSW when DstSVT is i32 (#131755) We don't have PACKSS for i64->i32. Fixes: https://godbolt.org/z/qb8nxnPbK, which was introduced by ddd2f57b (cherry picked from commit 3d631914677b58a5479b310f480ac76e27d41e7e) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +- llvm/test/CodeGen/X86/vector-trunc-nowrap.ll | 88 ++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4413fbb77f415..12c40b501f627 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20889,7 +20889,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT, return SDValue(); unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits; - if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) { + if ((Flags.hasNoSignedWrap() && DstSVT != MVT::i32) || + MinSignBits < NumSignBits) { PackOpcode = X86ISD::PACKSS; return In; } diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll index 2b8eedfbbdc9c..863f30e03d2d6 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll @@ -1592,3 +1592,91 @@ entry: %1 = bitcast <8 x i8> %0 to i64 ret i64 %1 } + +define void @foo(<4 x i64> %a, <4 x i64> %b, ptr %p) "min-legal-vector-width"="256" "prefer-vector-width"="256" { +; SSE-LABEL: foo: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, 16(%rdi) +; SSE-NEXT: movaps %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX1-LABEL: foo: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX1-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX1-NEXT: vmovaps %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: foo: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: foo: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-FAST-ALL-NEXT: vzeroupper +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: foo: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-FAST-PERLANE-NEXT: vzeroupper +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512F-LABEL: foo: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: foo: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi) +; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: foo: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: foo: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi) +; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> + %1 = trunc nsw <8 x i64> %0 to <8 x i32> + store <8 x i32> %1, ptr %p, align 16 + ret void +} From ce455b382c08425d4ed44c03c8129a6f150decb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez=20Troiti=C3=B1o?= Date: Mon, 16 Jun 2025 12:06:25 -0700 Subject: [PATCH 77/83] [objcopy][MachO] Revert special handling of encryptable binaries (#144058) Code originally added in #120995 and later corrected in #130517 but apparently still not correct according to #141494 and rust-lang/rust#141913. Revert the special handling because the test written in #120995 and #130517 still passes without those changes. Kept the test and improved it with a `__DATA` section to keep the current behaviour checked in case other changes modify the behaviour and break this edge case. (cherry picked from commit a0662ceba83cf8782da4047b8ee6d175591f168f) --- llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp | 8 - llvm/lib/ObjCopy/MachO/MachOObject.cpp | 4 - llvm/lib/ObjCopy/MachO/MachOObject.h | 3 - llvm/lib/ObjCopy/MachO/MachOReader.cpp | 4 - .../MachO/strip-with-encryption-info.test | 156 ++++++++++++------ 5 files changed, 106 insertions(+), 69 deletions(-) diff --git a/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp index 8ecd669e67178..93bc6631e64c8 100644 --- a/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp @@ -116,10 +116,6 @@ uint64_t MachOLayoutBuilder::layoutSegments() { const bool IsObjectFile = O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; - // If we are emitting an encryptable binary, our load commands must have a - // separate (non-encrypted) page to themselves. - bool RequiresFirstSectionOutsideFirstPage = - O.EncryptionInfoCommandIndex.has_value(); for (LoadCommand &LC : O.LoadCommands) { auto &MLC = LC.MachOLoadCommand; StringRef Segname; @@ -173,10 +169,6 @@ uint64_t MachOLayoutBuilder::layoutSegments() { if (!Sec->hasValidOffset()) { Sec->Offset = 0; } else { - if (RequiresFirstSectionOutsideFirstPage) { - SectOffset = alignToPowerOf2(SectOffset, PageSize); - RequiresFirstSectionOutsideFirstPage = false; - } Sec->Offset = SegOffset + SectOffset; Sec->Size = Sec->Content.size(); SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.cpp b/llvm/lib/ObjCopy/MachO/MachOObject.cpp index e0819d89d24ff..8d2c02dc37c99 100644 --- a/llvm/lib/ObjCopy/MachO/MachOObject.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOObject.cpp @@ -98,10 +98,6 @@ void Object::updateLoadCommandIndexes() { case MachO::LC_DYLD_EXPORTS_TRIE: ExportsTrieCommandIndex = Index; break; - case MachO::LC_ENCRYPTION_INFO: - case MachO::LC_ENCRYPTION_INFO_64: - EncryptionInfoCommandIndex = Index; - break; } } } diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.h b/llvm/lib/ObjCopy/MachO/MachOObject.h index 79eb0133c2802..a454c4f502fd6 100644 --- a/llvm/lib/ObjCopy/MachO/MachOObject.h +++ b/llvm/lib/ObjCopy/MachO/MachOObject.h @@ -341,9 +341,6 @@ struct Object { /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command /// corresponding to the __TEXT segment. std::optional TextSegmentCommandIndex; - /// The index of the LC_ENCRYPTION_INFO or LC_ENCRYPTION_INFO_64 load command - /// if present. - std::optional EncryptionInfoCommandIndex; BumpPtrAllocator Alloc; StringSaver NewSectionsContents; diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.cpp b/llvm/lib/ObjCopy/MachO/MachOReader.cpp index ef0e0262f9395..2b344f36d8e78 100644 --- a/llvm/lib/ObjCopy/MachO/MachOReader.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOReader.cpp @@ -184,10 +184,6 @@ Error MachOReader::readLoadCommands(Object &O) const { case MachO::LC_DYLD_CHAINED_FIXUPS: O.ChainedFixupsCommandIndex = O.LoadCommands.size(); break; - case MachO::LC_ENCRYPTION_INFO: - case MachO::LC_ENCRYPTION_INFO_64: - O.EncryptionInfoCommandIndex = O.LoadCommands.size(); - break; } #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test b/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test index 2b2bd670613de..d6f6fe10d88c2 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test +++ b/llvm/test/tools/llvm-objcopy/MachO/strip-with-encryption-info.test @@ -16,7 +16,11 @@ # CHECK: fileoff: 0 # The YAML below is the following code +# ``` +# static int foo = 12345; +# int bar = 4567; # int main(int argc, char **argv) { return 0; } +# ``` # Compiled on macOS against the macOS SDK and passing `-Wl,-encryptable` # Contents are removed, since they are not important for the test. We need a # small text segment (smaller than a page). @@ -26,8 +30,8 @@ FileHeader: cputype: 0x100000C cpusubtype: 0x0 filetype: 0x2 - ncmds: 15 - sizeofcmds: 696 + ncmds: 18 + sizeofcmds: 920 flags: 0x200085 reserved: 0x0 LoadCommands: @@ -69,7 +73,7 @@ LoadCommands: - sectname: __unwind_info segname: __TEXT addr: 0x100004020 - size: 4152 + size: 88 offset: 0x4020 align: 2 reloff: 0x0 @@ -79,37 +83,61 @@ LoadCommands: reserved2: 0x0 reserved3: 0x0 - cmd: LC_SEGMENT_64 - cmdsize: 72 - segname: __LINKEDIT + cmdsize: 152 + segname: __DATA vmaddr: 4295000064 - vmsize: 592 + vmsize: 16384 fileoff: 32768 - filesize: 592 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __data + segname: __DATA + addr: 0x100008000 + size: 4 + offset: 0x8000 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4295016448 + vmsize: 16384 + fileoff: 49152 + filesize: 768 maxprot: 1 initprot: 1 nsects: 0 flags: 0 - cmd: LC_DYLD_CHAINED_FIXUPS cmdsize: 16 - dataoff: 32768 - datasize: 48 + dataoff: 49152 + datasize: 56 - cmd: LC_DYLD_EXPORTS_TRIE cmdsize: 16 - dataoff: 32816 - datasize: 48 + dataoff: 49208 + datasize: 64 - cmd: LC_SYMTAB cmdsize: 24 - symoff: 32872 - nsyms: 2 - stroff: 32904 - strsize: 32 + symoff: 49280 + nsyms: 3 + stroff: 49328 + strsize: 40 - cmd: LC_DYSYMTAB cmdsize: 80 ilocalsym: 0 nlocalsym: 0 iextdefsym: 0 - nextdefsym: 2 - iundefsym: 2 + nextdefsym: 3 + iundefsym: 3 nundefsym: 0 tocoff: 0 ntoc: 0 @@ -123,12 +151,6 @@ LoadCommands: nextrel: 0 locreloff: 0 nlocrel: 0 - - cmd: LC_ENCRYPTION_INFO_64 - cmdsize: 24 - cryptoff: 16384 - cryptsize: 16384 - cryptid: 0 - pad: 0 - cmd: LC_LOAD_DYLINKER cmdsize: 32 name: 12 @@ -136,32 +158,50 @@ LoadCommands: ZeroPadBytes: 7 - cmd: LC_UUID cmdsize: 24 - uuid: 4C4C4447-5555-3144-A18A-01E9EB7E7D92 + uuid: ADDA943C-657A-3A49-9580-168E17A40FFB - cmd: LC_BUILD_VERSION cmdsize: 32 platform: 1 minos: 983040 - sdk: 983552 + sdk: 984320 ntools: 1 Tools: - - tool: 4 - version: 1310720 + - tool: 3 + version: 76481537 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 - cmd: LC_MAIN cmdsize: 24 entryoff: 16384 stacksize: 0 + - cmd: LC_ENCRYPTION_INFO_64 + cmdsize: 24 + cryptoff: 16384 + cryptsize: 16384 + cryptid: 0 + pad: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 88539136 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 - cmd: LC_FUNCTION_STARTS cmdsize: 16 - dataoff: 32864 + dataoff: 49272 datasize: 8 - cmd: LC_DATA_IN_CODE cmdsize: 16 - dataoff: 32872 + dataoff: 49280 datasize: 0 - cmd: LC_CODE_SIGNATURE cmdsize: 16 - dataoff: 32944 - datasize: 416 + dataoff: 49376 + datasize: 544 LinkEditData: ExportTrie: TerminalSize: 0 @@ -173,51 +213,67 @@ LinkEditData: ImportName: '' Children: - TerminalSize: 0 - NodeOffset: 5 + NodeOffset: 25 Name: _ Flags: 0x0 Address: 0x0 Other: 0x0 ImportName: '' Children: + - TerminalSize: 2 + NodeOffset: 9 + Name: _mh_execute_header + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' - TerminalSize: 4 - NodeOffset: 33 - Name: main + NodeOffset: 13 + Name: bar Flags: 0x0 - Address: 0x4000 + Address: 0x8000 Other: 0x0 ImportName: '' - - TerminalSize: 2 - NodeOffset: 39 - Name: _mh_execute_header + - TerminalSize: 4 + NodeOffset: 19 + Name: main Flags: 0x0 - Address: 0x0 + Address: 0x4000 Other: 0x0 ImportName: '' NameList: - n_strx: 2 n_type: 0xF n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0xF + n_sect: 3 n_desc: 0 - n_value: 4294983680 - - n_strx: 8 + n_value: 4295000064 + - n_strx: 27 n_type: 0xF n_sect: 1 - n_desc: 16 - n_value: 4294967296 + n_desc: 0 + n_value: 4294983680 StringTable: - ' ' - - _main - __mh_execute_header + - _bar + - _main + - '' + - '' + - '' - '' - '' - '' - '' FunctionStarts: [ 0x4000 ] - ChainedFixups: [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, - 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] + ChainedFixups: [ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x34, 0x0, + 0x0, 0x0, 0x34, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] ... - From fa792cd4c630b1b85a599f19204d1f36e0fa3a41 Mon Sep 17 00:00:00 2001 From: dianqk Date: Wed, 25 Jun 2025 18:39:36 +0800 Subject: [PATCH 78/83] [AsmPrinter] Always emit global equivalents if there is non-global uses (#145648) A case found from https://github.com/rust-lang/rust/issues/142752: https://llvm.godbolt.org/z/T7ce9saWh. We should emit `@bar_0` for the following code: ```llvm target triple = "x86_64-unknown-linux-gnu" @rel_0 = private unnamed_addr constant [1 x i32] [ i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_0 to i64), i64 ptrtoint (ptr @rel_0 to i64)) to i32)] @bar_0 = internal unnamed_addr constant ptr @foo_0, align 8 @foo_0 = external global ptr, align 8 define void @foo(ptr %arg0) { store ptr @bar_0, ptr %arg0, align 8 ret void } ``` (cherry picked from commit 630d55cce45f8b409367914ef372047c8c43c511) --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 24 ++++++++++----- llvm/test/MC/X86/gotpcrel-non-globals.ll | 36 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 llvm/test/MC/X86/gotpcrel-non-globals.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e77abf429e6b4..c8f567e5f4195 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2139,16 +2139,20 @@ void AsmPrinter::emitFunctionBody() { } /// Compute the number of Global Variables that uses a Constant. -static unsigned getNumGlobalVariableUses(const Constant *C) { - if (!C) +static unsigned getNumGlobalVariableUses(const Constant *C, + bool &HasNonGlobalUsers) { + if (!C) { + HasNonGlobalUsers = true; return 0; + } if (isa(C)) return 1; unsigned NumUses = 0; for (const auto *CU : C->users()) - NumUses += getNumGlobalVariableUses(dyn_cast(CU)); + NumUses += + getNumGlobalVariableUses(dyn_cast(CU), HasNonGlobalUsers); return NumUses; } @@ -2159,7 +2163,8 @@ static unsigned getNumGlobalVariableUses(const Constant *C) { /// candidates are skipped and are emitted later in case at least one cstexpr /// isn't replaced by a PC relative GOT entry access. static bool isGOTEquivalentCandidate(const GlobalVariable *GV, - unsigned &NumGOTEquivUsers) { + unsigned &NumGOTEquivUsers, + bool &HasNonGlobalUsers) { // Global GOT equivalents are unnamed private globals with a constant // pointer initializer to another global symbol. They must point to a // GlobalVariable or Function, i.e., as GlobalValue. @@ -2171,7 +2176,8 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // To be a got equivalent, at least one of its users need to be a constant // expression used by another global variable. for (const auto *U : GV->users()) - NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast(U)); + NumGOTEquivUsers += + getNumGlobalVariableUses(dyn_cast(U), HasNonGlobalUsers); return NumGOTEquivUsers > 0; } @@ -2189,9 +2195,13 @@ void AsmPrinter::computeGlobalGOTEquivs(Module &M) { for (const auto &G : M.globals()) { unsigned NumGOTEquivUsers = 0; - if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + bool HasNonGlobalUsers = false; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers, HasNonGlobalUsers)) continue; - + // If non-global variables use it, we still need to emit it. + // Add 1 here, then emit it in `emitGlobalGOTEquivs`. + if (HasNonGlobalUsers) + NumGOTEquivUsers += 1; const MCSymbol *GOTEquivSym = getSymbol(&G); GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); } diff --git a/llvm/test/MC/X86/gotpcrel-non-globals.ll b/llvm/test/MC/X86/gotpcrel-non-globals.ll new file mode 100644 index 0000000000000..222d2d73ff728 --- /dev/null +++ b/llvm/test/MC/X86/gotpcrel-non-globals.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; Check that we emit the `@bar_*` symbols, and that we don't emit multiple symbols. + +; CHECK-LABEL: .Lrel_0: +; CHECK: .long foo_0@GOTPCREL+0 +; CHECK-LABEL: .Lrel_1_failed: +; CHECK: .long bar_1-foo_0 +; CHECK-LABEL: .Lrel_2: +; CHECK: .long foo_2@GOTPCREL+0 + +; CHECK: bar_0: +; CHECK: bar_1: +; CHECK: bar_2_indirect: + +@rel_0 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_0 to i64), i64 ptrtoint (ptr @rel_0 to i64)) to i32)] +@rel_1_failed = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_1 to i64), i64 ptrtoint (ptr @foo_0 to i64)) to i32)] +@rel_2 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_2_indirect to i64), i64 ptrtoint (ptr @rel_2 to i64)) to i32)] +@bar_0 = internal unnamed_addr constant ptr @foo_0, align 8 +@bar_1 = internal unnamed_addr constant ptr @foo_1, align 8 +@bar_2_indirect = internal unnamed_addr constant ptr @foo_2, align 8 +@foo_0 = external global ptr, align 8 +@foo_1 = external global ptr, align 8 +@foo_2 = external global ptr, align 8 + +define void @foo(ptr %arg0, ptr %arg1) { + store ptr @bar_0, ptr %arg0, align 8 + store ptr @bar_1, ptr %arg1, align 8 + store ptr getelementptr (i8, ptr @bar_2_indirect, i32 1), ptr %arg1, align 8 + ret void +} From 0c9f909b7976dc4a1643bf708484dc63725ecb21 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 18 Jun 2025 12:58:17 +0100 Subject: [PATCH 79/83] [AArch64][SME] Fix restoring callee-saves from FP with hazard padding (#143371) Currently, when hazard-padding is enabled a (fixed-size) hazard slot is placed in the CS area, just after the frame record. The size of this slot is part of the "CalleeSaveBaseToFrameRecordOffset". The SVE epilogue emission code assumed this offset was always zero, and incorrectly setting the stack pointer, resulting in all SVE registers being reloaded from incorrect offsets. ``` | prev_lr | | prev_fp | | (a.k.a. "frame record") | |-----------------------------------| <- fp(=x29) | | |-----------------------------------| <- callee-saved base | | | callee-saved fp/simd/SVE regs | | | |-----------------------------------| <- SVE callee-save base ``` i.e. in the above diagram, the code assumed `fp == callee-saved base`. --- .../Target/AArch64/AArch64FrameLowering.cpp | 37 +- llvm/test/CodeGen/AArch64/stack-hazard.ll | 1173 +++++++++++++++++ 2 files changed, 1198 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 74b80438a28b2..ce947951b26ef 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2501,20 +2501,33 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // Deallocate the SVE area. if (SVEStackSize) { - // If we have stack realignment or variable sized objects on the stack, - // restore the stack pointer from the frame pointer prior to SVE CSR - // restoration. - if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - // Set SP to start of SVE callee-save area from which they can - // be reloaded. The code below will deallocate the stack space - // space by moving FP -> SP. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, - StackOffset::getScalable(-CalleeSavedSize), TII, + int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize(); + // If we have stack realignment or variable-sized objects we must use the + // FP to restore SVE callee saves (as there is an unknown amount of + // data/padding between the SP and SVE CS area). + Register BaseForSVEDealloc = + (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP + : AArch64::SP; + if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) { + Register CalleeSaveBase = AArch64::FP; + if (int64_t CalleeSaveBaseOffset = + AFI->getCalleeSaveBaseToFrameRecordOffset()) { + // If we have have an non-zero offset to the non-SVE CS base we need to + // compute the base address by subtracting the offest in a temporary + // register first (to avoid briefly deallocating the SVE CS). + CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister( + &AArch64::GPR64RegClass); + emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP, + StackOffset::getFixed(-CalleeSaveBaseOffset), TII, MachineInstr::FrameDestroy); } - } else { - if (AFI->getSVECalleeSavedStackSize()) { + // The code below will deallocate the stack space space by moving the + // SP to the start of the SVE callee-save area. + emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, + StackOffset::getScalable(-SVECalleeSavedSize), TII, + MachineInstr::FrameDestroy); + } else if (BaseForSVEDealloc == AArch64::SP) { + if (SVECalleeSavedSize) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. emitFrameOffset( diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index df4918493edf8..fcedcb8e24222 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -3152,3 +3152,1176 @@ entry: call void @bar(ptr noundef nonnull %b) ret i32 0 } + + +define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_dynamic_alloca: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w20, -16 +; CHECK0-NEXT: .cfi_offset w26, -24 +; CHECK0-NEXT: .cfi_offset w27, -32 +; CHECK0-NEXT: .cfi_offset w28, -40 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: mov w9, w0 +; CHECK0-NEXT: mov x8, sp +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: add x9, x9, #15 +; CHECK0-NEXT: mov x19, sp +; CHECK0-NEXT: and x9, x9, #0x1fffffff0 +; CHECK0-NEXT: sub x8, x8, x9 +; CHECK0-NEXT: mov sp, x8 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: and x20, x0, #0x1 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: tbz w20, #0, .LBB35_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB35_2: // %entry +; CHECK0-NEXT: mov x0, x8 +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w20, #0, .LBB35_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB35_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload +; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w20 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_dynamic_alloca: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -8 +; CHECK64-NEXT: .cfi_offset w20, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: sub sp, sp, #64 +; CHECK64-NEXT: mov w9, w0 +; CHECK64-NEXT: mov x8, sp +; CHECK64-NEXT: mov w2, w1 +; CHECK64-NEXT: add x9, x9, #15 +; CHECK64-NEXT: mov x19, sp +; CHECK64-NEXT: and x9, x9, #0x1fffffff0 +; CHECK64-NEXT: sub x8, x8, x9 +; CHECK64-NEXT: mov sp, x8 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: bl __arm_sme_state +; CHECK64-NEXT: and x20, x0, #0x1 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: tbz w20, #0, .LBB35_2 +; CHECK64-NEXT: // %bb.1: // %entry +; CHECK64-NEXT: smstop sm +; CHECK64-NEXT: .LBB35_2: // %entry +; CHECK64-NEXT: mov x0, x8 +; CHECK64-NEXT: mov w1, #45 // =0x2d +; CHECK64-NEXT: bl memset +; CHECK64-NEXT: tbz w20, #0, .LBB35_4 +; CHECK64-NEXT: // %bb.3: // %entry +; CHECK64-NEXT: smstart sm +; CHECK64-NEXT: .LBB35_4: // %entry +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: .cfi_restore z8 +; CHECK64-NEXT: .cfi_restore z9 +; CHECK64-NEXT: .cfi_restore z10 +; CHECK64-NEXT: .cfi_restore z11 +; CHECK64-NEXT: .cfi_restore z12 +; CHECK64-NEXT: .cfi_restore z13 +; CHECK64-NEXT: .cfi_restore z14 +; CHECK64-NEXT: .cfi_restore z15 +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #96] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 0 +; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w20 +; CHECK64-NEXT: .cfi_restore w26 +; CHECK64-NEXT: .cfi_restore w27 +; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore w30 +; CHECK64-NEXT: .cfi_restore w29 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_dynamic_alloca: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NEXT: cntd x9 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -8 +; CHECK1024-NEXT: .cfi_offset w20, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: sub sp, sp, #1024 +; CHECK1024-NEXT: mov w9, w0 +; CHECK1024-NEXT: mov x8, sp +; CHECK1024-NEXT: mov w2, w1 +; CHECK1024-NEXT: add x9, x9, #15 +; CHECK1024-NEXT: mov x19, sp +; CHECK1024-NEXT: and x9, x9, #0x1fffffff0 +; CHECK1024-NEXT: sub x8, x8, x9 +; CHECK1024-NEXT: mov sp, x8 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: bl __arm_sme_state +; CHECK1024-NEXT: and x20, x0, #0x1 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: tbz w20, #0, .LBB35_2 +; CHECK1024-NEXT: // %bb.1: // %entry +; CHECK1024-NEXT: smstop sm +; CHECK1024-NEXT: .LBB35_2: // %entry +; CHECK1024-NEXT: mov x0, x8 +; CHECK1024-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NEXT: bl memset +; CHECK1024-NEXT: tbz w20, #0, .LBB35_4 +; CHECK1024-NEXT: // %bb.3: // %entry +; CHECK1024-NEXT: smstart sm +; CHECK1024-NEXT: .LBB35_4: // %entry +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: .cfi_restore z8 +; CHECK1024-NEXT: .cfi_restore z9 +; CHECK1024-NEXT: .cfi_restore z10 +; CHECK1024-NEXT: .cfi_restore z11 +; CHECK1024-NEXT: .cfi_restore z12 +; CHECK1024-NEXT: .cfi_restore z13 +; CHECK1024-NEXT: .cfi_restore z14 +; CHECK1024-NEXT: .cfi_restore z15 +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w20 +; CHECK1024-NEXT: .cfi_restore w26 +; CHECK1024-NEXT: .cfi_restore w27 +; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore w30 +; CHECK1024-NEXT: .cfi_restore w29 +; CHECK1024-NEXT: ret +entry: + %ptr = alloca i8, i32 %P1 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_realign: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w26, -16 +; CHECK0-NEXT: .cfi_offset w27, -24 +; CHECK0-NEXT: .cfi_offset w28, -32 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: sub x9, sp, #1024 +; CHECK0-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: and x19, x0, #0x1 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: tbz w19, #0, .LBB36_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB36_2: // %entry +; CHECK0-NEXT: mov x0, sp +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w19, #0, .LBB36_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB36_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_realign: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: sub x9, sp, #1088 +; CHECK64-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK64-NEXT: mov w2, w1 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: bl __arm_sme_state +; CHECK64-NEXT: and x19, x0, #0x1 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: tbz w19, #0, .LBB36_2 +; CHECK64-NEXT: // %bb.1: // %entry +; CHECK64-NEXT: smstop sm +; CHECK64-NEXT: .LBB36_2: // %entry +; CHECK64-NEXT: mov x0, sp +; CHECK64-NEXT: mov w1, #45 // =0x2d +; CHECK64-NEXT: bl memset +; CHECK64-NEXT: tbz w19, #0, .LBB36_4 +; CHECK64-NEXT: // %bb.3: // %entry +; CHECK64-NEXT: smstart sm +; CHECK64-NEXT: .LBB36_4: // %entry +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: .cfi_restore z8 +; CHECK64-NEXT: .cfi_restore z9 +; CHECK64-NEXT: .cfi_restore z10 +; CHECK64-NEXT: .cfi_restore z11 +; CHECK64-NEXT: .cfi_restore z12 +; CHECK64-NEXT: .cfi_restore z13 +; CHECK64-NEXT: .cfi_restore z14 +; CHECK64-NEXT: .cfi_restore z15 +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x26, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x28, x27, [sp, #88] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 0 +; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w26 +; CHECK64-NEXT: .cfi_restore w27 +; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore w30 +; CHECK64-NEXT: .cfi_restore w29 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_realign: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NEXT: cntd x9 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: sub x9, sp, #2048 +; CHECK1024-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK1024-NEXT: mov w2, w1 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: bl __arm_sme_state +; CHECK1024-NEXT: and x19, x0, #0x1 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: tbz w19, #0, .LBB36_2 +; CHECK1024-NEXT: // %bb.1: // %entry +; CHECK1024-NEXT: smstop sm +; CHECK1024-NEXT: .LBB36_2: // %entry +; CHECK1024-NEXT: mov x0, sp +; CHECK1024-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NEXT: bl memset +; CHECK1024-NEXT: tbz w19, #0, .LBB36_4 +; CHECK1024-NEXT: // %bb.3: // %entry +; CHECK1024-NEXT: smstart sm +; CHECK1024-NEXT: .LBB36_4: // %entry +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: .cfi_restore z8 +; CHECK1024-NEXT: .cfi_restore z9 +; CHECK1024-NEXT: .cfi_restore z10 +; CHECK1024-NEXT: .cfi_restore z11 +; CHECK1024-NEXT: .cfi_restore z12 +; CHECK1024-NEXT: .cfi_restore z13 +; CHECK1024-NEXT: .cfi_restore z14 +; CHECK1024-NEXT: .cfi_restore z15 +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w26 +; CHECK1024-NEXT: .cfi_restore w27 +; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore w30 +; CHECK1024-NEXT: .cfi_restore w29 +; CHECK1024-NEXT: ret +entry: + %ptr = alloca i8, i32 1000, align 32 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: sub sp, sp, #48 +; CHECK0-NEXT: addvl sp, sp, #-1 +; CHECK0-NEXT: mov x19, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w20, -16 +; CHECK0-NEXT: .cfi_offset w26, -24 +; CHECK0-NEXT: .cfi_offset w27, -32 +; CHECK0-NEXT: .cfi_offset w28, -48 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK0-NEXT: ubfiz x8, x0, #2, #32 +; CHECK0-NEXT: mov x9, sp +; CHECK0-NEXT: add x8, x8, #15 +; CHECK0-NEXT: and x8, x8, #0x7fffffff0 +; CHECK0-NEXT: sub x20, x9, x8 +; CHECK0-NEXT: mov sp, x20 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: add x0, x19, #8 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: addvl x0, x29, #-19 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: mov x0, x20 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x26, x20, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #112 +; CHECK64-NEXT: addvl sp, sp, #-1 +; CHECK64-NEXT: mov x19, sp +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w20, -24 +; CHECK64-NEXT: .cfi_offset w26, -32 +; CHECK64-NEXT: .cfi_offset w27, -40 +; CHECK64-NEXT: .cfi_offset w28, -48 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK64-NEXT: ubfiz x8, x0, #2, #32 +; CHECK64-NEXT: mov x9, sp +; CHECK64-NEXT: add x8, x8, #15 +; CHECK64-NEXT: and x8, x8, #0x7fffffff0 +; CHECK64-NEXT: sub x20, x9, x8 +; CHECK64-NEXT: mov sp, x20 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: add x0, x19, #8 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: sub x0, x29, #64 +; CHECK64-NEXT: addvl x0, x0, #-19 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: mov x0, x20 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: ldp x20, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #88] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x30, x28, [sp, #72] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: sub sp, sp, #1072 +; CHECK1024-NEXT: addvl sp, sp, #-1 +; CHECK1024-NEXT: mov x19, sp +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w20, -24 +; CHECK1024-NEXT: .cfi_offset w26, -32 +; CHECK1024-NEXT: .cfi_offset w27, -40 +; CHECK1024-NEXT: .cfi_offset w28, -48 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK1024-NEXT: ubfiz x8, x0, #2, #32 +; CHECK1024-NEXT: mov x9, sp +; CHECK1024-NEXT: add x8, x8, #15 +; CHECK1024-NEXT: and x8, x8, #0x7fffffff0 +; CHECK1024-NEXT: sub x20, x9, x8 +; CHECK1024-NEXT: mov sp, x20 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: add x0, x19, #8 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: sub x0, x29, #1024 +; CHECK1024-NEXT: addvl x0, x0, #-19 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: mov x0, x20 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: ret +entry: + %a = alloca i32, i32 10 + %b = alloca + %c = alloca i32, i32 %P1, align 4 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + call void @bar(ptr noundef nonnull %a) + call void @bar(ptr noundef nonnull %b) + call void @bar(ptr noundef nonnull %c) + ret i32 -396142473 +} From 6fb913d3e2ec82ac3b351f9652c311cd12088a8f Mon Sep 17 00:00:00 2001 From: dianqk Date: Sat, 28 Jun 2025 06:42:42 +0800 Subject: [PATCH 80/83] [RelLookupTableConverter] Drop unnamed_addr for GVs in entries to avoid generating GOTPCREL relocations (#146068) The entry in a relative lookup table is a global variable with a constant offset, such as `@gv`, `GEP @gv, 1`, and so on. We cannot only consider the case of a trivial global variable. This PR handles all cases using the existing `IsConstantOffsetFromGlobal` function. (cherry picked from commit c43282ab69d7ff1b64f8ef5c84eab46e57553075) --- .../Utils/RelLookupTableConverter.cpp | 37 ++++++++------- .../RelLookupTableConverter/unnamed_addr.ll | 45 +++++++++++++++++++ 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index 4486cba2bf6c0..fe1fe391f5982 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -67,6 +67,20 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64) return false; + SmallVector GVOps; + Triple TT(M.getTargetTriple()); + // FIXME: This should be removed in the future. + bool ShouldDropUnnamedAddr = + // Drop unnamed_addr to avoid matching pattern in + // `handleIndirectSymViaGOTPCRel`, which generates GOTPCREL relocations + // not supported by the GNU linker and LLD versions below 18 on aarch64. + TT.isAArch64() + // Apple's ld64 (and ld-prime on Xcode 15.2) miscompile something on + // x86_64-apple-darwin. See + // https://github.com/rust-lang/rust/issues/140686 and + // https://github.com/rust-lang/rust/issues/141306. + || (TT.isX86() && TT.isOSDarwin()); + for (const Use &Op : Array->operands()) { Constant *ConstOp = cast(&Op); GlobalValue *GVOp; @@ -86,8 +100,15 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { !GlovalVarOp->isDSOLocal() || !GlovalVarOp->isImplicitDSOLocal()) return false; + + if (ShouldDropUnnamedAddr) + GVOps.push_back(GlovalVarOp); } + if (ShouldDropUnnamedAddr) + for (auto *GVOp : GVOps) + GVOp->setUnnamedAddr(GlobalValue::UnnamedAddr::None); + return true; } @@ -109,24 +130,8 @@ static GlobalVariable *createRelLookupTable(Function &Func, uint64_t Idx = 0; SmallVector RelLookupTableContents(NumElts); - Triple TT(M.getTargetTriple()); - // FIXME: This should be removed in the future. - bool ShouldDropUnnamedAddr = - // Drop unnamed_addr to avoid matching pattern in - // `handleIndirectSymViaGOTPCRel`, which generates GOTPCREL relocations - // not supported by the GNU linker and LLD versions below 18 on aarch64. - TT.isAArch64() - // Apple's ld64 (and ld-prime on Xcode 15.2) miscompile something on - // x86_64-apple-darwin. See - // https://github.com/rust-lang/rust/issues/140686 and - // https://github.com/rust-lang/rust/issues/141306. - || (TT.isX86() && TT.isOSDarwin()); - for (Use &Operand : LookupTableArr->operands()) { Constant *Element = cast(Operand); - if (ShouldDropUnnamedAddr) - if (auto *GlobalElement = dyn_cast(Element)) - GlobalElement->setUnnamedAddr(GlobalValue::UnnamedAddr::None); Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy); Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy); diff --git a/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll index 78b8a4aa126c9..322c38d090fe1 100644 --- a/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll +++ b/llvm/test/Transforms/RelLookupTableConverter/unnamed_addr.ll @@ -20,6 +20,14 @@ @y3 = internal unnamed_addr constant ptr @x0 @load_relative_2.table = private unnamed_addr constant [4 x ptr] [ptr @y3, ptr @y2, ptr @y1, ptr @y0] +@b0 = private unnamed_addr constant [8 x i8] c"00000000" +@b1 = private unnamed_addr constant [8 x i8] c"11111111" +@b2 = private unnamed_addr constant [8 x i8] c"22222222" +@load_relative_3.table = private unnamed_addr constant [3 x ptr] [ + ptr getelementptr inbounds (i8, ptr @b0, i64 8), + ptr getelementptr inbounds (i8, ptr @b1, i64 8), + ptr getelementptr inbounds (i8, ptr @b2, i64 8)] + ;. ; x86_64-apple-darwin: @a0 = private constant i32 0 ; x86_64-apple-darwin: @a1 = private constant i32 1 @@ -34,6 +42,10 @@ ; x86_64-apple-darwin: @y2 = internal constant ptr @x1 ; x86_64-apple-darwin: @y3 = internal constant ptr @x0 ; x86_64-apple-darwin: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4 +; x86_64-apple-darwin: @b0 = private constant [8 x i8] c"00000000" +; x86_64-apple-darwin: @b1 = private constant [8 x i8] c"11111111" +; x86_64-apple-darwin: @b2 = private constant [8 x i8] c"22222222" +; x86_64-apple-darwin: @load_relative_3.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b0, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b1, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b2, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32)], align 4 ;. ; aarch64: @a0 = private constant i32 0 ; aarch64: @a1 = private constant i32 1 @@ -48,6 +60,10 @@ ; aarch64: @y2 = internal constant ptr @x1 ; aarch64: @y3 = internal constant ptr @x0 ; aarch64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4 +; aarch64: @b0 = private constant [8 x i8] c"00000000" +; aarch64: @b1 = private constant [8 x i8] c"11111111" +; aarch64: @b2 = private constant [8 x i8] c"22222222" +; aarch64: @load_relative_3.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b0, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b1, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b2, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32)], align 4 ;. ; x86_64: @a0 = private unnamed_addr constant i32 0 ; x86_64: @a1 = private unnamed_addr constant i32 1 @@ -62,6 +78,10 @@ ; x86_64: @y2 = internal unnamed_addr constant ptr @x1 ; x86_64: @y3 = internal unnamed_addr constant ptr @x0 ; x86_64: @load_relative_2.table.rel = private unnamed_addr constant [4 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @y3 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y2 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y1 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @y0 to i64), i64 ptrtoint (ptr @load_relative_2.table.rel to i64)) to i32)], align 4 +; x86_64: @b0 = private unnamed_addr constant [8 x i8] c"00000000" +; x86_64: @b1 = private unnamed_addr constant [8 x i8] c"11111111" +; x86_64: @b2 = private unnamed_addr constant [8 x i8] c"22222222" +; x86_64: @load_relative_3.table.rel = private unnamed_addr constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b0, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b1, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @b2, i64 8) to i64), i64 ptrtoint (ptr @load_relative_3.table.rel to i64)) to i32)], align 4 ;. define ptr @load_relative_1(i64 %offset) { ; x86_64-apple-darwin-LABEL: define ptr @load_relative_1( @@ -110,6 +130,31 @@ define ptr @load_relative_2(i64 %offset) { %load = load ptr, ptr %gep ret ptr %load } + +define ptr @load_relative_3(i64 %offset) { +; x86_64-apple-darwin-LABEL: define ptr @load_relative_3( +; x86_64-apple-darwin-SAME: i64 [[OFFSET:%.*]]) { +; x86_64-apple-darwin-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; x86_64-apple-darwin-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_3.table.rel, i64 [[RELTABLE_SHIFT]]) +; x86_64-apple-darwin-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; +; aarch64-LABEL: define ptr @load_relative_3( +; aarch64-SAME: i64 [[OFFSET:%.*]]) { +; aarch64-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; aarch64-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_3.table.rel, i64 [[RELTABLE_SHIFT]]) +; aarch64-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; +; x86_64-LABEL: define ptr @load_relative_3( +; x86_64-SAME: i64 [[OFFSET:%.*]]) { +; x86_64-NEXT: [[RELTABLE_SHIFT:%.*]] = shl i64 [[OFFSET]], 2 +; x86_64-NEXT: [[RELTABLE_INTRINSIC:%.*]] = call ptr @llvm.load.relative.i64(ptr @load_relative_3.table.rel, i64 [[RELTABLE_SHIFT]]) +; x86_64-NEXT: ret ptr [[RELTABLE_INTRINSIC]] +; + %gep = getelementptr inbounds [3 x ptr], ptr @load_relative_3.table, i64 0, i64 %offset + %load = load ptr, ptr %gep + ret ptr %load +} + ;. ; x86_64-apple-darwin: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } ;. From 25bcf1145fd78e945eda6d9c72bde9b8e294b6b3 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Fri, 18 Apr 2025 09:12:52 -0700 Subject: [PATCH 81/83] [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) We can't assume MBBI is still pointing at MBB if we've already expanded a probe. We need to re-query the MBB from MBBI. Fixes #135206 Co-authored-by: Craig Topper (cherry picked from commit b3d2dc321c5c78b7204696afe07fe6ef3375acfd) --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 8 +- llvm/test/CodeGen/RISCV/pr135206.ll | 84 ++++++++++++++++++++ 2 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/pr135206.ll diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index bb2e5781c34db..6f4c1e16190f4 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -2135,11 +2135,13 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { } // Synthesize the probe loop. -static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, +static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL, Register TargetReg, bool IsRVV) { assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP"); + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineFunction &MF = *MBB.getParent(); + auto &Subtarget = MF.getSubtarget(); const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); bool IsRV64 = Subtarget.is64Bit(); @@ -2228,7 +2230,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MI->getIterator(); DebugLoc DL = MBB.findDebugLoc(MBBI); Register TargetReg = MI->getOperand(1).getReg(); - emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg, + emitStackProbeInline(MBBI, DL, TargetReg, (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV)); MBBI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll new file mode 100644 index 0000000000000..859179f62d704 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr135206.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple riscv64 < %s -o - | FileCheck %s + +%"buff" = type { [4096 x i64] } + +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +declare void @bar() + +define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -2032 +; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill +; CHECK-NEXT: lui a0, 7 +; CHECK-NEXT: sub t1, sp, a0 +; CHECK-NEXT: lui t2, 1 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub sp, sp, t2 +; CHECK-NEXT: sd zero, 0(sp) +; CHECK-NEXT: bne sp, t1, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: addi sp, sp, -2048 +; CHECK-NEXT: addi sp, sp, -96 +; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: lui t2, 1 +; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub sp, sp, t2 +; CHECK-NEXT: sd zero, 0(sp) +; CHECK-NEXT: sub t1, t1, t2 +; CHECK-NEXT: bge t1, t2, .LBB0_3 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: sub sp, sp, t1 +; CHECK-NEXT: li a0, 86 +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: addi s1, sp, 32 +; CHECK-NEXT: addi s2, sp, 16 +; CHECK-NEXT: lui a1, 353637 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a0, a0, 32 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addiw a0, a1, 1622 +; CHECK-NEXT: vse8.v v8, (s0) +; CHECK-NEXT: vse8.v v8, (s1) +; CHECK-NEXT: vse8.v v8, (s2) +; CHECK-NEXT: slli a1, a0, 32 +; CHECK-NEXT: add s3, a0, a1 +; CHECK-NEXT: sd s3, 64(sp) +; CHECK-NEXT: call bar +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a0, a0, 32 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (s0) +; CHECK-NEXT: vse8.v v8, (s1) +; CHECK-NEXT: vse8.v v8, (s2) +; CHECK-NEXT: sd s3, 64(sp) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: lui a1, 8 +; CHECK-NEXT: addiw a1, a1, -1952 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 2032 +; CHECK-NEXT: ret + %1 = alloca %"buff", align 8 + call void @llvm.memset.p0.i64(ptr %1, i8 86, i64 56, i1 false) + call void @bar() + call void @llvm.memset.p0.i64(ptr %1, i8 86, i64 56, i1 false) + ret i1 false +} + From df43f93388b7587c9843838a237dd57a9bd19b52 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 6 Jun 2025 17:50:16 +0200 Subject: [PATCH 82/83] [PhaseOrdering] Add test for #139050 (NFC) (cherry picked from commit cef5a3155bab9a2db5389f782471d56f1dd15b61) --- .../PhaseOrdering/X86/vector-reductions.ll | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 254136b0b841a..f8450766037b2 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -325,3 +325,53 @@ cleanup: %retval.0 = phi i1 [ false, %if.then ], [ true, %if.end ] ret i1 %retval.0 } + +; From https://github.com/llvm/llvm-project/issues/139050. +; FIXME: This should be vectorized. +define i8 @masked_min_reduction(ptr %data, ptr %mask) { +; CHECK-LABEL: @masked_min_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[DATA]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[M:%.*]] = load i8, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[M]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]]) +; CHECK-NEXT: [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: ret i8 [[TMP21]] +; +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %next, %loop ] + %acc = phi i8 [ 255, %entry ], [ %acc_next, %loop ] + + %ptr_i = getelementptr i8, ptr %data, i64 %i + %val = load i8, ptr %ptr_i, align 1 + + %mask_ptr = getelementptr i8, ptr %mask, i64 %i + %m = load i8, ptr %mask_ptr, align 1 + %cond = icmp eq i8 %m, 0 + + ; Use select to implement masking + %masked_val = select i1 %cond, i8 %val, i8 255 + + ; min reduction + %acc_next = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) + + %next = add i64 %i, 1 + %cmp = icmp ult i64 %next, 1024 + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %acc_next +} From 87f0227cb60147a26a1eeb4fb06e3b505e9c7261 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Sat, 14 Jun 2025 09:32:54 +0300 Subject: [PATCH 83/83] [InstCombine] Avoid folding `select(umin(X, Y), X)` with min/max values in false arm (#143020) Fixes https://github.com/llvm/llvm-project/issues/139050. This patch adds a check to avoid folding min/max reduction into select, which may block loop vectorization. The issue is that the following snippet: ``` declare i8 @llvm.umin.i8(i8, i8) define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) { ; CHECK-LABEL: @masked_min_fold_bug( ; CHECK: %cond = icmp eq i8 %mask, 0 ; CHECK: %masked_val = select i1 %cond, i8 %val, i8 255 ; CHECK: call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) ; %cond = icmp eq i8 %mask, 0 %masked_val = select i1 %cond, i8 %val, i8 255 %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) ret i8 %res } ``` is being optimized to the following code, which can not be vectorized later. ``` declare i8 @llvm.umin.i8(i8, i8) #0 define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) { %cond = icmp eq i8 %mask, 0 %1 = call i8 @llvm.umin.i8(i8 %acc, i8 %val) %res = select i1 %cond, i8 %1, i8 %acc ret i8 %res } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ``` Expected: ``` declare i8 @llvm.umin.i8(i8, i8) #0 define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) { %cond = icmp eq i8 %mask, 0 %masked_val = select i1 %cond, i8 %val, i8 -1 %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) ret i8 %res } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ``` https://godbolt.org/z/cYMheKE5r (cherry picked from commit 07fa6d1d90c714fa269529c3e5004a063d814c4a) --- .../InstCombine/InstructionCombining.cpp | 9 ++++ llvm/test/Transforms/InstCombine/select.ll | 47 +++++++++++++++++ .../PhaseOrdering/X86/vector-reductions.ll | 50 ++++++++++++++----- 3 files changed, 94 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index a64c188575e6c..0f5e867877da2 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1697,6 +1697,15 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; + // Avoid breaking min/max reduction pattern, + // which is necessary for vectorization later. + if (isa(&Op)) + for (Value *IntrinOp : Op.operands()) + if (auto *PN = dyn_cast(IntrinOp)) + for (Value *PhiOp : PN->operands()) + if (PhiOp == &Op) + return nullptr; + // Test if a FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 3c3111492fc68..e8a32cb1697a5 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -4901,3 +4901,50 @@ define i32 @src_simplify_2x_at_once_and(i32 %x, i32 %y) { %cond = select i1 %and0, i32 %sub, i32 %xor ret i32 %cond } + +define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) { +; CHECK-LABEL: @no_fold_masked_min_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[RES:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[VALS:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[MASK_PTR:%.*]] = getelementptr inbounds i8, ptr [[MASKS:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[VAL_PTR]], align 1 +; CHECK-NEXT: [[MASK:%.*]] = load i8, ptr [[MASK_PTR]], align 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK]], 0 +; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL]], i8 -1 +; CHECK-NEXT: [[RES]] = call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[MASKED_VAL]]) +; CHECK-NEXT: [[NEXT_INDEX]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXT_INDEX]], [[N:%.*]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store i8 [[RES]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %index = phi i64 [0, %entry], [%next_index, %loop] + %acc = phi i8 [255, %entry], [%res, %loop] + + %val_ptr = getelementptr inbounds i8, ptr %vals, i64 %index + %mask_ptr = getelementptr inbounds i8, ptr %masks, i64 %index + + %val = load i8, ptr %val_ptr, align 1 + %mask = load i8, ptr %mask_ptr, align 1 + + %cond = icmp eq i8 %mask, 0 + %masked_val = select i1 %cond, i8 %val, i8 -1 + %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) + + %next_index = add i64 %index, 1 + %done = icmp eq i64 %next_index, %n + br i1 %done, label %exit, label %loop + +exit: + store i8 %res, ptr %out, align 1 + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index f8450766037b2..2ec48a8637dae 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -326,26 +326,52 @@ cleanup: ret i1 %retval.0 } -; From https://github.com/llvm/llvm-project/issues/139050. -; FIXME: This should be vectorized. define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-LABEL: @masked_min_reduction( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: loop: +; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[DATA]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DATA]], i64 96 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[DATA]], align 1 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[M:%.*]] = load i8, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[M]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]]) -; CHECK-NEXT: [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]]) +; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]]) +; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]]) +; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP16]], <32 x i8> [[TMP17]]) +; CHECK-NEXT: [[RDX_MINMAX11:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX]], <32 x i8> [[TMP18]]) +; CHECK-NEXT: [[RDX_MINMAX12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX11]], <32 x i8> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = tail call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> [[RDX_MINMAX12]]) ; CHECK-NEXT: ret i8 [[TMP21]] ; entry: