From ebc0e072950b48ab784a65ae146e66e3a7f35977 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 14 Nov 2025 09:54:56 -0800 Subject: [PATCH 01/56] [AMDGPU] Use std::variant in ArgDescriptor. (#167992) This replaces the 2 bool flags and the anonymous union. This also removes an implicit conversion from Register to unsigned and a call to MCRegister::id(). The ArgDescriptor constructor was always assigning the union through the MCRegister field even for stack offsets. The change to SIMachineFunctionInfo.h fixes a case where getRegister was being called on an unset ArgDescriptor. Since it was only this case, it seemed cleaner to fix it at the caller. The other option would be to make getRegister() return MCRegister() for an unset ArgDescriptor. --- .../Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 44 +++++++------------ .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 4 +- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 8838a94a639eb..cb7f63639aee3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/Register.h" #include "llvm/Pass.h" +#include namespace llvm { @@ -27,55 +28,44 @@ struct ArgDescriptor { friend struct AMDGPUFunctionArgInfo; friend class AMDGPUArgumentUsageInfo; - union { - MCRegister Reg; - unsigned StackOffset; - }; + std::variant Val; // Bitmask to locate argument within the register. unsigned Mask; - bool IsStack : 1; - bool IsSet : 1; - public: - ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u, bool IsStack = false, - bool IsSet = false) - : Reg(Val), Mask(Mask), IsStack(IsStack), IsSet(IsSet) {} + ArgDescriptor(unsigned Mask = ~0u) : Mask(Mask) {} static ArgDescriptor createRegister(Register Reg, unsigned Mask = ~0u) { - return ArgDescriptor(Reg, Mask, false, true); + ArgDescriptor Ret(Mask); + Ret.Val = Reg.asMCReg(); + return Ret; } static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) { - return ArgDescriptor(Offset, Mask, true, true); + ArgDescriptor Ret(Mask); + Ret.Val = Offset; + return Ret; } static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) { - return ArgDescriptor(Arg.Reg.id(), Mask, Arg.IsStack, Arg.IsSet); + // Copy the descriptor, then change the mask. + ArgDescriptor Ret(Arg); + Ret.Mask = Mask; + return Ret; } - bool isSet() const { - return IsSet; - } + bool isSet() const { return !std::holds_alternative(Val); } explicit operator bool() const { return isSet(); } - bool isRegister() const { - return !IsStack; - } + bool isRegister() const { return std::holds_alternative(Val); } - MCRegister getRegister() const { - assert(!IsStack); - return Reg; - } + MCRegister getRegister() const { return std::get(Val); } - unsigned getStackOffset() const { - assert(IsStack); - return StackOffset; - } + unsigned getStackOffset() const { return std::get(Val); } unsigned getMask() const { // None of the target SGPRs or VGPRs are expected to have a 'zero' mask. diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 019c3b79e5fe5..ca3c35067a923 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -1014,7 +1014,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, void setNumWaveDispatchVGPRs(unsigned Count) { NumWaveDispatchVGPRs = Count; } Register getPrivateSegmentWaveByteOffsetSystemSGPR() const { - return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); + if (ArgInfo.PrivateSegmentWaveByteOffset) + return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); + return MCRegister(); } /// Returns the physical register reserved for use as the resource From 739a5a4685590b51cab5a5fd0bb9b7d89a7754ef Mon Sep 17 00:00:00 2001 From: agozillon Date: Fri, 14 Nov 2025 19:03:33 +0100 Subject: [PATCH 02/56] [Flang][OpenMP] Fix defaultmap(none) being overly aggressive with symbol checks (#167806) Currently we're picking up and complaining about builtin (and procedure) symbols like null() when defaultmap(none) is set, so I've relaxed the restriction a bit to allow for procedures and named constants to bypass the restriction. It might be the case that we want to tighten it up again in certain aspects in the future. --- flang/lib/Semantics/resolve-directives.cpp | 9 ++++- .../OpenMP/defaultmap-clause-none.f90 | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index f1658943ab2e1..7b1a3ba493f5f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -3054,8 +3054,13 @@ void OmpAttributeVisitor::Post(const parser::Name &name) { // place for the types specified. if (Symbol * found{currScope().FindSymbol(name.source)}) { // If the variable has declare target applied to it (enter or link) it - // is exempt from defaultmap(none) restrictions - if (!symbol->GetUltimate().test(Symbol::Flag::OmpDeclareTarget)) { + // is exempt from defaultmap(none) restrictions. + // We also exempt procedures and named constants from defaultmap(none) + // checking. + if (!symbol->GetUltimate().test(Symbol::Flag::OmpDeclareTarget) && + !(IsProcedure(*symbol) && + !semantics::IsProcedurePointer(*symbol)) && + !IsNamedConstant(*symbol)) { auto &dMap = GetContext().defaultMap; for (auto defaults : dMap) { if (defaults.second == diff --git a/flang/test/Semantics/OpenMP/defaultmap-clause-none.f90 b/flang/test/Semantics/OpenMP/defaultmap-clause-none.f90 index 08e8ebc995097..0b74e3412e472 100644 --- a/flang/test/Semantics/OpenMP/defaultmap-clause-none.f90 +++ b/flang/test/Semantics/OpenMP/defaultmap-clause-none.f90 @@ -94,3 +94,40 @@ subroutine defaultmap_aggregate_none end do !$omp end target end subroutine defaultmap_aggregate_none + +! Verify we do not catch null in defaultmap(none) +subroutine defaultmap_builtin_none + implicit none + integer, pointer :: ptr(:) + + !$omp target defaultmap(none) map(ptr) +!CHECK-NOT: The DEFAULTMAP(NONE) clause requires that 'null' must be listed in a data-sharing attribute, data-mapping attribute, or is_device_ptr clause + ptr => null() + !$omp end target +end subroutine defaultmap_builtin_none + +module pro + implicit none +contains + + function test_procedure() result(ret) + integer :: ret + ret = 1 + end function test_procedure + +! Verify we do not catch a function symbol in defaultmap(none) +! but do catch procedure pointers +subroutine defaultmap_func_and_procedure_pointer() + implicit none + procedure(test_procedure), pointer :: f1 + integer :: i + + f1 => test_procedure + + !$omp target defaultmap(none) map(i) +!ERROR: The DEFAULTMAP(NONE) clause requires that 'f1' must be listed in a data-sharing attribute, data-mapping attribute, or is_device_ptr clause + i = f1() + i = test_procedure() + !$omp end target +end subroutine defaultmap_func_and_procedure_pointer +end module From 306f49a2549428db165f4eea585e983aa2468c05 Mon Sep 17 00:00:00 2001 From: Jun Wang Date: Fri, 14 Nov 2025 10:14:12 -0800 Subject: [PATCH 03/56] [AMDGPU][MC] Disallow nogds in ds_gws_* instructions (#166873) The ds_gws_* instructions require gds as an operand. However, when nogds is given, it is treated the same as gds. This patch fixes this to disallow nogds. --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 +++ llvm/test/MC/AMDGPU/gfx10_asm_ds_err.s | 38 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_ds_err.s | 37 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx7_asm_ds_err.s | 37 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx8_asm_ds_err.s | 37 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx9_asm_ds_err.s | 37 ++++++++++++++++++ 6 files changed, 192 insertions(+) create mode 100644 llvm/test/MC/AMDGPU/gfx10_asm_ds_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_ds_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx7_asm_ds_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx8_asm_ds_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx9_asm_ds_err.s diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 5e0486aa1dd49..68060553e558c 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -7044,6 +7044,12 @@ ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, if (Name == "a16" && !hasA16()) return Error(S, "a16 modifier is not supported on this GPU"); + if (Bit == 0 && Name == "gds") { + StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); + if (Mnemo.starts_with("ds_gws")) + return Error(S, "nogds is not allowed"); + } + if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) ImmTy = AMDGPUOperand::ImmTyR128A16; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_ds_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_ds_err.s new file mode 100644 index 0000000000000..dcf3f1be4139f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx10_asm_ds_err.s @@ -0,0 +1,38 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +ds_gws_sema_release_all nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_release_all offset:4660 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v0 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v0 offset:0 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v0 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v0 offset:4660 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p offset:0 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v0 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v0 offset:0 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_ds_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_ds_err.s new file mode 100644 index 0000000000000..c7c92fe51238a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_ds_err.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +ds_gws_barrier v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_release_all nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_release_all offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed diff --git a/llvm/test/MC/AMDGPU/gfx7_asm_ds_err.s b/llvm/test/MC/AMDGPU/gfx7_asm_ds_err.s new file mode 100644 index 0000000000000..5596bf5b6ea30 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx7_asm_ds_err.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --implicit-check-not=error: %s + +ds_gws_sema_release_all offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_release_all nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v255 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_ds_err.s b/llvm/test/MC/AMDGPU/gfx8_asm_ds_err.s new file mode 100644 index 0000000000000..27df0b8eacf43 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx8_asm_ds_err.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s + +ds_gws_sema_release_all offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_release_all nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v255 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_ds_err.s b/llvm/test/MC/AMDGPU/gfx9_asm_ds_err.s new file mode 100644 index 0000000000000..e9c71cc3d000c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx9_asm_ds_err.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +ds_gws_sema_release_all offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_release_all nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_init v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_v nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_br v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_sema_p nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v1 offset:65535 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed + +ds_gws_barrier v1 nogds +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: nogds is not allowed From 420d56a3945588ca2141602c387c0feb3f358f47 Mon Sep 17 00:00:00 2001 From: jimingham Date: Fri, 14 Nov 2025 10:14:28 -0800 Subject: [PATCH 04/56] Clean up MachTask.mm's handling of m_exception_thread. (#167994) This was getting joined in ShutDownExcecptionThread (sic) but not cleared. So this function was not safe to call twice, since you aren't supposed to join a thread twice. Sadly, this was called in MachTask::Clear and MachProcess::Destroy, which are both called when you tell debugserver to detach. This didn't seem to cause problems IRL, but the most recent ASAN detects this as an error and calls ASAN::Die, which was causing all the tests that ran detach to fail. I fixed that by moving the clear & test for m_exception_thread to ShutDownExceptionThread. I also fixed the spelling of that routine. And that routine was claiming to return a kern_return_t which no one was checking. It actually returns a kern_return_t if there was a Mach failure and a Posix error if there was a join failure. Since there's really nothing you can do but exit if this fails, which is always what you are in the process of doing when you call this, and since we have already done all the useful logging in ShutDownExceptionThread, I just removed the return value. --- lldb/tools/debugserver/source/MacOSX/MachProcess.mm | 2 +- lldb/tools/debugserver/source/MacOSX/MachTask.h | 2 +- lldb/tools/debugserver/source/MacOSX/MachTask.mm | 13 ++++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm index 8df3f29a7e825..3b875e61a268d 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm @@ -1739,7 +1739,7 @@ static uint64_t bits(uint64_t value, uint32_t msbit, uint32_t lsbit) { ReplyToAllExceptions(); } - m_task.ShutDownExcecptionThread(); + m_task.ShutDownExceptionThread(); // Detach from our process errno = 0; diff --git a/lldb/tools/debugserver/source/MacOSX/MachTask.h b/lldb/tools/debugserver/source/MacOSX/MachTask.h index 915f65a8160ee..40fdbe9eeeb24 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachTask.h +++ b/lldb/tools/debugserver/source/MacOSX/MachTask.h @@ -68,7 +68,7 @@ class MachTask { bool ExceptionPortIsValid() const; kern_return_t SaveExceptionPortInfo(); kern_return_t RestoreExceptionPortInfo(); - kern_return_t ShutDownExcecptionThread(); + void ShutDownExceptionThread(); bool StartExceptionThread( const RNBContext::IgnoredExceptions &ignored_exceptions, DNBError &err); diff --git a/lldb/tools/debugserver/source/MacOSX/MachTask.mm b/lldb/tools/debugserver/source/MacOSX/MachTask.mm index e5bbab830b187..22ad0c407de42 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachTask.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachTask.mm @@ -145,10 +145,8 @@ //---------------------------------------------------------------------- void MachTask::Clear() { // Do any cleanup needed for this task - if (m_exception_thread) - ShutDownExcecptionThread(); + ShutDownExceptionThread(); m_task = TASK_NULL; - m_exception_thread = 0; m_exception_port = MACH_PORT_NULL; m_exec_will_be_suspended = false; m_do_double_resume = false; @@ -685,8 +683,11 @@ static void get_threads_profile_data(DNBProfileDataScanType scanType, return false; } -kern_return_t MachTask::ShutDownExcecptionThread() { +void MachTask::ShutDownExceptionThread() { DNBError err; + + if (!m_exception_thread) + return; err = RestoreExceptionPortInfo(); @@ -702,6 +703,8 @@ static void get_threads_profile_data(DNBProfileDataScanType scanType, if (DNBLogCheckLogBit(LOG_TASK) || err.Fail()) err.LogThreaded("::pthread_join ( thread = %p, value_ptr = NULL)", m_exception_thread); + + m_exception_thread = nullptr; // Deallocate our exception port that we used to track our child process mach_port_t task_self = mach_task_self(); @@ -713,7 +716,7 @@ static void get_threads_profile_data(DNBProfileDataScanType scanType, m_exec_will_be_suspended = false; m_do_double_resume = false; - return err.Status(); + return; } void *MachTask::ExceptionThread(void *arg) { From 52f2a9445d420841ba503550c77fa4e2b6d88ae9 Mon Sep 17 00:00:00 2001 From: Tony Tao Date: Fri, 14 Nov 2025 13:19:26 -0500 Subject: [PATCH 05/56] [Support] Prevent loss of file type flags when creating temporary (#167939) Non-binary output files from the compiler need the `OF_Text` flag set for encoding conversion to be performed correctly on z/OS. --------- Co-authored-by: Tony Tao --- clang/test/CodeGen/SystemZ/encoding.c | 9 ++++++++ llvm/lib/Support/VirtualOutputBackends.cpp | 25 +++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/SystemZ/encoding.c diff --git a/clang/test/CodeGen/SystemZ/encoding.c b/clang/test/CodeGen/SystemZ/encoding.c new file mode 100644 index 0000000000000..d018a9c37852a --- /dev/null +++ b/clang/test/CodeGen/SystemZ/encoding.c @@ -0,0 +1,9 @@ +// Checks encoding of output file +// This is only required for z/OS. +// +// REQUIRES: system-zos, systemz-registered-target +// RUN: %clang_cc1 -triple s390x-ibm-zos -S %s -o %t.s +// RUN: ls -T %t.s | FileCheck %s + +// CHECK: t IBM-1047 T=on +void foo() { return; } diff --git a/llvm/lib/Support/VirtualOutputBackends.cpp b/llvm/lib/Support/VirtualOutputBackends.cpp index de59b8ab63a53..f9c8f1302e6c5 100644 --- a/llvm/lib/Support/VirtualOutputBackends.cpp +++ b/llvm/lib/Support/VirtualOutputBackends.cpp @@ -254,6 +254,20 @@ static Error createDirectoriesOnDemand(StringRef OutputPath, }); } +static sys::fs::OpenFlags generateFlagsFromConfig(OutputConfig Config) { + sys::fs::OpenFlags OF = sys::fs::OF_None; + if (Config.getTextWithCRLF()) + OF |= sys::fs::OF_TextWithCRLF; + else if (Config.getText()) + OF |= sys::fs::OF_Text; + // Don't pass OF_Append if writting to temporary since OF_Append is + // not Atomic Append + if (Config.getAppend() && !Config.getAtomicWrite()) + OF |= sys::fs::OF_Append; + + return OF; +} + Error OnDiskOutputFile::tryToCreateTemporary(std::optional &FD) { // Create a temporary file. // Insert -%%%%%%%% before the extension (if any), and because some tools @@ -269,8 +283,9 @@ Error OnDiskOutputFile::tryToCreateTemporary(std::optional &FD) { return createDirectoriesOnDemand(OutputPath, Config, [&]() -> Error { int NewFD; SmallString<128> UniquePath; + sys::fs::OpenFlags OF = generateFlagsFromConfig(Config); if (std::error_code EC = - sys::fs::createUniqueFile(ModelPath, NewFD, UniquePath)) + sys::fs::createUniqueFile(ModelPath, NewFD, UniquePath, OF)) return make_error(ModelPath, OutputPath, EC); if (Config.getDiscardOnSignal()) @@ -312,13 +327,7 @@ Error OnDiskOutputFile::initializeFile(std::optional &FD) { // Not using a temporary file. Open the final output file. return createDirectoriesOnDemand(OutputPath, Config, [&]() -> Error { int NewFD; - sys::fs::OpenFlags OF = sys::fs::OF_None; - if (Config.getTextWithCRLF()) - OF |= sys::fs::OF_TextWithCRLF; - else if (Config.getText()) - OF |= sys::fs::OF_Text; - if (Config.getAppend()) - OF |= sys::fs::OF_Append; + sys::fs::OpenFlags OF = generateFlagsFromConfig(Config); if (std::error_code EC = sys::fs::openFileForWrite( OutputPath, NewFD, sys::fs::CD_CreateAlways, OF)) return convertToOutputError(OutputPath, EC); From 8e4209a2905b3a698424e199db12330656ee9faa Mon Sep 17 00:00:00 2001 From: "Henrik G. Olsson" Date: Fri, 14 Nov 2025 10:19:42 -0800 Subject: [PATCH 06/56] [utils] don't warn when setting rlimit fails on Solaris (#167921) Solaris doesn't define RLIMIT_NPROC, so this is expected to fail there. This fixes a test failure in llvm/utils/lit/tests/verbosity.py on Solaris due to this unexpected warning being included in the lit output. --- llvm/utils/lit/lit/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py index 55de914d4d2e0..3fc4a1b9b40bd 100644 --- a/llvm/utils/lit/lit/run.py +++ b/llvm/utils/lit/lit/run.py @@ -137,10 +137,11 @@ def _increase_process_limit(self): "Raised process limit from %d to %d" % (soft_limit, desired_limit) ) except Exception as ex: - # Warn, unless this is Windows, z/OS, or Cygwin in which case this is expected. + # Warn, unless this is Windows, z/OS, Solaris or Cygwin in which case this is expected. if ( os.name != "nt" and platform.system() != "OS/390" + and platform.system() != "SunOS" and platform.sys.platform != "cygwin" ): self.lit_config.warning("Failed to raise process limit: %s" % ex) From 3f0ef2765c7f08c397c73d5a21481cf215b4f9f3 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 14 Nov 2025 10:22:56 -0800 Subject: [PATCH 07/56] [MLIR][LLVM] Debug info: import debug records directly (#167812) Effectively means we don't need to call into `llvmModule->convertFromNewDbgValues()` anymore. Added a flag to allow users to access the old behavior. --- .../include/mlir/Target/LLVMIR/ModuleImport.h | 30 ++- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 14 +- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 242 +++++++++++++----- .../LLVMIR/Import/debug-info-records.ll | 87 +++++++ 4 files changed, 308 insertions(+), 65 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/Import/debug-info-records.ll diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleImport.h b/mlir/include/mlir/Target/LLVMIR/ModuleImport.h index 09d819a05618b..a4a7df985b681 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleImport.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleImport.h @@ -163,9 +163,10 @@ class ModuleImport { /// Converts `value` to a float attribute. Asserts if the matching fails. FloatAttr matchFloatAttr(llvm::Value *value); - /// Converts `value` to a local variable attribute. Asserts if the matching - /// fails. - DILocalVariableAttr matchLocalVariableAttr(llvm::Value *value); + /// Converts `valOrVariable` to a local variable attribute. Asserts if the + /// matching fails. + DILocalVariableAttr matchLocalVariableAttr( + llvm::PointerUnion valOrVariable); /// Converts `value` to a label attribute. Asserts if the matching fails. DILabelAttr matchLabelAttr(llvm::Value *value); @@ -281,6 +282,9 @@ class ModuleImport { /// after the function conversion has finished. void addDebugIntrinsic(llvm::CallInst *intrinsic); + /// Similar to `addDebugIntrinsic`, but for debug records. + void addDebugRecord(llvm::DbgRecord *debugRecord); + /// Converts the LLVM values for an intrinsic to mixed MLIR values and /// attributes for LLVM_IntrOpBase. Attributes correspond to LLVM immargs. The /// list `immArgPositions` contains the positions of immargs on the LLVM @@ -339,9 +343,26 @@ class ModuleImport { /// Converts all debug intrinsics in `debugIntrinsics`. Assumes that the /// function containing the intrinsics has been fully converted to MLIR. LogicalResult processDebugIntrinsics(); + /// Converts all debug records in `debugRecords`. Assumes that the + /// function containing the record has been fully converted to MLIR. + LogicalResult processDebugRecords(); /// Converts a single debug intrinsic. LogicalResult processDebugIntrinsic(llvm::DbgVariableIntrinsic *dbgIntr, DominanceInfo &domInfo); + /// Converts a single debug record. + LogicalResult processDebugRecord(llvm::DbgRecord &debugRecord, + DominanceInfo &domInfo); + /// Process arguments for declare/value operation insertion. `localVarAttr` + /// and `localExprAttr` are the attained attributes after importing the debug + /// variable and expressions. This also sets the builder insertion point to be + /// used by these operations. + std::tuple + processDebugOpArgumentsAndInsertionPt( + Location loc, bool hasArgList, bool isKillLocation, + llvm::function_ref()> convertArgOperandToValue, + llvm::Value *address, + llvm::PointerUnion variable, + llvm::DIExpression *expression, DominanceInfo &domInfo); /// Converts LLMV IR asm inline call operand's attributes into an array of /// MLIR attributes to be utilized in `llvm.inline_asm`. ArrayAttr convertAsmInlineOperandAttrs(const llvm::CallBase &llvmCall); @@ -485,6 +506,9 @@ class ModuleImport { /// Function-local list of debug intrinsics that need to be imported after the /// function conversion has finished. SetVector debugIntrinsics; + /// Function-local list of debug records that need to be imported after the + /// function conversion has finished. + SetVector debugRecords; /// Mapping between LLVM alias scope and domain metadata nodes and /// attributes in the LLVM dialect corresponding to these nodes. DenseMap aliasScopeMapping; diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 2dd0640f794e5..ba80f6294bd9b 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -30,6 +30,14 @@ void registerFromLLVMIRTranslation() { llvm::cl::desc("Emit expensive warnings during LLVM IR import " "(discouraged: testing only!)"), llvm::cl::init(false)); + static llvm::cl::opt convertDebugRecToIntrinsics( + "convert-debug-rec-to-intrinsics", + llvm::cl::desc("Change the input LLVM module to use old debug intrinsics " + "instead of records " + "via convertFromNewDbgValues, this happens " + "before importing the debug information" + "(discouraged: to be removed soon!)"), + llvm::cl::init(false)); static llvm::cl::opt dropDICompositeTypeElements( "drop-di-composite-type-elements", llvm::cl::desc( @@ -69,8 +77,10 @@ void registerFromLLVMIRTranslation() { if (llvm::verifyModule(*llvmModule, &llvm::errs())) return nullptr; - // Debug records are not currently supported in the LLVM IR translator. - llvmModule->convertFromNewDbgValues(); + // Now that the translation supports importing debug records directly, + // make it the default, but allow the user to override to old behavior. + if (!convertDebugRecToIntrinsics) + llvmModule->convertFromNewDbgValues(); return translateLLVMIRToModule( std::move(llvmModule), context, emitExpensiveWarnings, diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index d9891e3168820..b8106101692b8 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -34,12 +34,14 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugProgramInstruction.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/LogicalResult.h" #include "llvm/Support/ModRef.h" #include @@ -522,6 +524,11 @@ void ModuleImport::addDebugIntrinsic(llvm::CallInst *intrinsic) { debugIntrinsics.insert(intrinsic); } +void ModuleImport::addDebugRecord(llvm::DbgRecord *debugRecord) { + if (!debugRecords.contains(debugRecord)) + debugRecords.insert(debugRecord); +} + static Attribute convertCGProfileModuleFlagValue(ModuleOp mlirModule, llvm::MDTuple *mdTuple) { auto getLLVMFunction = @@ -2003,9 +2010,15 @@ FloatAttr ModuleImport::matchFloatAttr(llvm::Value *value) { return floatAttr; } -DILocalVariableAttr ModuleImport::matchLocalVariableAttr(llvm::Value *value) { - auto *nodeAsVal = cast(value); - auto *node = cast(nodeAsVal->getMetadata()); +DILocalVariableAttr ModuleImport::matchLocalVariableAttr( + llvm::PointerUnion valOrVariable) { + llvm::DILocalVariable *node = nullptr; + if (auto *value = dyn_cast(valOrVariable)) { + auto *nodeAsVal = cast(value); + node = cast(nodeAsVal->getMetadata()); + } else { + node = cast(valOrVariable); + } return debugImporter->translate(node); } @@ -2544,6 +2557,11 @@ LogicalResult ModuleImport::processInstruction(llvm::Instruction *inst) { if (auto *intrinsic = dyn_cast(inst)) return convertIntrinsic(intrinsic); + // Capture instruction with attached debug markers for later processing. + if (inst->DebugMarker) + for (llvm::DbgRecord &debugRecord : inst->DebugMarker->getDbgRecordRange()) + addDebugRecord(&debugRecord); + // Convert all remaining LLVM instructions to MLIR operations. return convertInstruction(inst); } @@ -3007,76 +3025,50 @@ LogicalResult ModuleImport::processFunction(llvm::Function *func) { if (failed(processDebugIntrinsics())) return failure(); + // Process the debug records that require a delayed conversion after + // everything else was converted. + if (failed(processDebugRecords())) + return failure(); + return success(); } -/// Checks if `dbgIntr` is a kill location that holds metadata instead of an SSA -/// value. -static bool isMetadataKillLocation(llvm::DbgVariableIntrinsic *dbgIntr) { - if (!dbgIntr->isKillLocation()) +/// Checks if a kill location holds metadata instead of an SSA value. +static bool isMetadataKillLocation(bool isKillLocation, llvm::Value *value) { + if (!isKillLocation) return false; - llvm::Value *value = dbgIntr->getArgOperand(0); auto *nodeAsVal = dyn_cast(value); if (!nodeAsVal) return false; return !isa(nodeAsVal->getMetadata()); } -LogicalResult -ModuleImport::processDebugIntrinsic(llvm::DbgVariableIntrinsic *dbgIntr, - DominanceInfo &domInfo) { - Location loc = translateLoc(dbgIntr->getDebugLoc()); - auto emitUnsupportedWarning = [&]() { - if (emitExpensiveWarnings) - emitWarning(loc) << "dropped intrinsic: " << diag(*dbgIntr); - return success(); - }; - // Drop debug intrinsics with arg lists. - // TODO: Support debug intrinsics that have arg lists. - if (dbgIntr->hasArgList()) - return emitUnsupportedWarning(); - // Kill locations can have metadata nodes as location operand. This - // cannot be converted to poison as the type cannot be reconstructed. - // TODO: find a way to support this case. - if (isMetadataKillLocation(dbgIntr)) - return emitUnsupportedWarning(); - // Drop debug intrinsics if the associated variable information cannot be - // translated due to cyclic debug metadata. - // TODO: Support cyclic debug metadata. - DILocalVariableAttr localVariableAttr = - matchLocalVariableAttr(dbgIntr->getArgOperand(1)); - if (!localVariableAttr) - return emitUnsupportedWarning(); - FailureOr argOperand = convertMetadataValue(dbgIntr->getArgOperand(0)); - if (failed(argOperand)) - return emitError(loc) << "failed to convert a debug intrinsic operand: " - << diag(*dbgIntr); - - // Ensure that the debug intrinsic is inserted right after its operand is - // defined. Otherwise, the operand might not necessarily dominate the - // intrinsic. If the defining operation is a terminator, insert the intrinsic - // into a dominated block. - OpBuilder::InsertionGuard guard(builder); - if (Operation *op = argOperand->getDefiningOp(); +/// Ensure that the debug intrinsic is inserted right after the operand +/// definition. Otherwise, the operand might not necessarily dominate the +/// intrinsic. If the defining operation is a terminator, insert the intrinsic +/// into a dominated block. +static LogicalResult setDebugIntrinsicBuilderInsertionPoint( + mlir::OpBuilder &builder, DominanceInfo &domInfo, Value argOperand) { + if (Operation *op = argOperand.getDefiningOp(); op && op->hasTrait()) { // Find a dominated block that can hold the debug intrinsic. auto dominatedBlocks = domInfo.getNode(op->getBlock())->children(); // If no block is dominated by the terminator, this intrinisc cannot be // converted. if (dominatedBlocks.empty()) - return emitUnsupportedWarning(); + return failure(); // Set insertion point before the terminator, to avoid inserting something // before landingpads. Block *dominatedBlock = (*dominatedBlocks.begin())->getBlock(); builder.setInsertionPoint(dominatedBlock->getTerminator()); } else { - Value insertPt = *argOperand; - if (auto blockArg = dyn_cast(*argOperand)) { + Value insertPt = argOperand; + if (auto blockArg = dyn_cast(argOperand)) { // The value might be coming from a phi node and is now a block argument, // which means the insertion point is set to the start of the block. If // this block is a target destination of an invoke, the insertion point // must happen after the landing pad operation. - Block *insertionBlock = argOperand->getParentBlock(); + Block *insertionBlock = argOperand.getParentBlock(); if (!insertionBlock->empty() && isa(insertionBlock->front())) insertPt = cast(insertionBlock->front()).getRes(); @@ -3084,23 +3076,143 @@ ModuleImport::processDebugIntrinsic(llvm::DbgVariableIntrinsic *dbgIntr, builder.setInsertionPointAfterValue(insertPt); } - auto locationExprAttr = - debugImporter->translateExpression(dbgIntr->getExpression()); - Operation *op = - llvm::TypeSwitch(dbgIntr) - .Case([&](llvm::DbgDeclareInst *) { - return LLVM::DbgDeclareOp::create( - builder, loc, *argOperand, localVariableAttr, locationExprAttr); - }) - .Case([&](llvm::DbgValueInst *) { - return LLVM::DbgValueOp::create( - builder, loc, *argOperand, localVariableAttr, locationExprAttr); - }); + return success(); +} + +std::tuple +ModuleImport::processDebugOpArgumentsAndInsertionPt( + Location loc, bool hasArgList, bool isKillLocation, + llvm::function_ref()> convertArgOperandToValue, + llvm::Value *address, + llvm::PointerUnion variable, + llvm::DIExpression *expression, DominanceInfo &domInfo) { + // Drop debug intrinsics with arg lists. + // TODO: Support debug intrinsics that have arg lists. + if (hasArgList) + return {}; + // Kill locations can have metadata nodes as location operand. This + // cannot be converted to poison as the type cannot be reconstructed. + // TODO: find a way to support this case. + if (isMetadataKillLocation(isKillLocation, address)) + return {}; + // Drop debug intrinsics if the associated variable information cannot be + // translated due to cyclic debug metadata. + // TODO: Support cyclic debug metadata. + DILocalVariableAttr localVarAttr = matchLocalVariableAttr(variable); + if (!localVarAttr) + return {}; + FailureOr argOperand = convertArgOperandToValue(); + if (failed(argOperand)) { + emitError(loc) << "failed to convert a debug operand: " << diag(*address); + return {}; + } + + if (setDebugIntrinsicBuilderInsertionPoint(builder, domInfo, *argOperand) + .failed()) + return {}; + + return {localVarAttr, debugImporter->translateExpression(expression), + *argOperand}; +} + +LogicalResult +ModuleImport::processDebugIntrinsic(llvm::DbgVariableIntrinsic *dbgIntr, + DominanceInfo &domInfo) { + Location loc = translateLoc(dbgIntr->getDebugLoc()); + auto emitUnsupportedWarning = [&]() { + if (emitExpensiveWarnings) + emitWarning(loc) << "dropped intrinsic: " << diag(*dbgIntr); + return success(); + }; + + OpBuilder::InsertionGuard guard(builder); + auto convertArgOperandToValue = [&]() { + return convertMetadataValue(dbgIntr->getArgOperand(0)); + }; + + auto [localVariableAttr, locationExprAttr, locVal] = + processDebugOpArgumentsAndInsertionPt( + loc, dbgIntr->hasArgList(), dbgIntr->isKillLocation(), + convertArgOperandToValue, dbgIntr->getArgOperand(0), + dbgIntr->getArgOperand(1), dbgIntr->getExpression(), domInfo); + + if (!localVariableAttr) + return emitUnsupportedWarning(); + + if (!locVal) // Expected if localVariableAttr is present. + return failure(); + + Operation *op = nullptr; + if (isa(dbgIntr)) + op = LLVM::DbgDeclareOp::create(builder, loc, locVal, localVariableAttr, + locationExprAttr); + else if (isa(dbgIntr)) + op = LLVM::DbgValueOp::create(builder, loc, locVal, localVariableAttr, + locationExprAttr); + else + return emitUnsupportedWarning(); + mapNoResultOp(dbgIntr, op); setNonDebugMetadataAttrs(dbgIntr, op); return success(); } +LogicalResult ModuleImport::processDebugRecord(llvm::DbgRecord &debugRecord, + DominanceInfo &domInfo) { + Location loc = translateLoc(debugRecord.getDebugLoc()); + auto emitUnsupportedWarning = [&]() { + if (!emitExpensiveWarnings) + return success(); + std::string options; + llvm::raw_string_ostream optionsStream(options); + debugRecord.print(optionsStream); + emitWarning(loc) << "unhandled debug record " << optionsStream.str(); + return success(); + }; + + OpBuilder::InsertionGuard guard(builder); + auto *dbgVar = dyn_cast(&debugRecord); + if (!dbgVar) + return emitUnsupportedWarning(); + + auto convertArgOperandToValue = [&]() -> FailureOr { + llvm::Value *value = dbgVar->getAddress(); + + // Return the mapped value if it has been converted before. + auto it = valueMapping.find(value); + if (it != valueMapping.end()) + return it->getSecond(); + + // Convert constants such as immediate values that have no mapping yet. + if (auto *constant = dyn_cast(value)) + return convertConstantExpr(constant); + return failure(); + }; + + auto [localVariableAttr, locationExprAttr, locVal] = + processDebugOpArgumentsAndInsertionPt( + loc, dbgVar->hasArgList(), dbgVar->isKillLocation(), + convertArgOperandToValue, dbgVar->getAddress(), dbgVar->getVariable(), + dbgVar->getExpression(), domInfo); + + if (!localVariableAttr) + return emitUnsupportedWarning(); + + if (!locVal) // Expected if localVariableAttr is present. + return failure(); + + if (dbgVar->isDbgDeclare()) + LLVM::DbgDeclareOp::create(builder, loc, locVal, localVariableAttr, + locationExprAttr); + else if (dbgVar->isDbgValue()) + LLVM::DbgValueOp::create(builder, loc, locVal, localVariableAttr, + locationExprAttr); + else // isDbgAssign + return emitUnsupportedWarning(); + + return success(); +} + LogicalResult ModuleImport::processDebugIntrinsics() { DominanceInfo domInfo; for (llvm::Instruction *inst : debugIntrinsics) { @@ -3111,6 +3223,16 @@ LogicalResult ModuleImport::processDebugIntrinsics() { return success(); } +LogicalResult ModuleImport::processDebugRecords() { + DominanceInfo domInfo; + for (llvm::DbgRecord *debugRecord : debugRecords) { + if (failed(processDebugRecord(*debugRecord, domInfo))) + return failure(); + } + debugRecords.clear(); + return success(); +} + LogicalResult ModuleImport::processBasicBlock(llvm::BasicBlock *bb, Block *block) { builder.setInsertionPointToStart(block); diff --git a/mlir/test/Target/LLVMIR/Import/debug-info-records.ll b/mlir/test/Target/LLVMIR/Import/debug-info-records.ll new file mode 100644 index 0000000000000..077871e356774 --- /dev/null +++ b/mlir/test/Target/LLVMIR/Import/debug-info-records.ll @@ -0,0 +1,87 @@ +; RUN: mlir-translate -import-llvm -mlir-print-debuginfo -convert-debug-rec-to-intrinsics -emit-expensive-warnings -split-input-file %s 2>&1 | FileCheck %s +; RUN: mlir-translate -import-llvm -mlir-print-debuginfo -emit-expensive-warnings -split-input-file %s 2>&1 | FileCheck %s + +; CHECK: #[[LOCAL_VAR0:.*]] = #llvm.di_local_variable +; CHECK: #[[LOCAL_VAR1:.*]] = #llvm.di_local_variable = %[[ARG0]] : i64 + ; CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 + ; CHECK: %[[ADDR:.*]] = llvm.alloca %[[CST]] x i64 + ; CHECK: llvm.intr.dbg.declare #[[LOCAL_VAR2]] #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_convert(4, DW_ATE_signed)]> = %[[ADDR]] : !llvm.ptr + %2 = alloca i64, align 8, !dbg !19 + #dbg_value(i64 %0, !20, !DIExpression(DW_OP_LLVM_fragment, 0, 1), !22) + #dbg_declare(ptr %2, !23, !DIExpression(DW_OP_deref, DW_OP_LLVM_convert, 4, DW_ATE_signed), !25) + #dbg_value(i64 %0, !26, !DIExpression(), !27) + call void @func_no_debug(), !dbg !28 + %3 = add i64 %0, %0, !dbg !32 + ret void, !dbg !37 +} + +define void @empty_types() !dbg !38 { + ret void, !dbg !44 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "MLIR", isOptimized: true, runtimeVersion: 0, splitDebugFilename: "test.dwo", emissionKind: FullDebug, nameTableKind: None) +!1 = !DIFile(filename: "foo.mlir", directory: "/test/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "func_with_debug", linkageName: "func_with_debug", scope: !4, file: !1, line: 3, type: !6, scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!4 = !DINamespace(name: "nested", scope: !5) +!5 = !DINamespace(name: "toplevel", scope: null, exportSymbols: true) +!6 = !DISubroutineType(cc: DW_CC_normal, types: !7) +!7 = !{null, !8, !9, !11, !12, !13, !16} +!8 = !DIBasicType(name: "si64") +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32, offset: 8, extraData: !10) +!10 = !DIBasicType(name: "si32", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "named", baseType: !10) +!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32, offset: 8, dwarfAddressSpace: 3) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "composite", file: !1, line: 42, size: 64, align: 32, elements: !14) +!14 = !{!15} +!15 = !DISubrange(count: 4) +!16 = !DICompositeType(tag: DW_TAG_array_type, name: "array", file: !1, baseType: !8, flags: DIFlagVector, elements: !17) +!17 = !{!18} +!18 = !DISubrange(lowerBound: 0, upperBound: 4, stride: 1) +!19 = !DILocation(line: 100, column: 12, scope: !3) +!20 = !DILocalVariable(name: "arg", arg: 1, scope: !21, file: !1, line: 6, type: !8, align: 32) +!21 = distinct !DILexicalBlockFile(scope: !3, file: !1, discriminator: 0) +!22 = !DILocation(line: 103, column: 3, scope: !3) +!23 = !DILocalVariable(name: "alloc", scope: !24) +!24 = distinct !DILexicalBlock(scope: !3) +!25 = !DILocation(line: 106, column: 3, scope: !3) +!26 = !DILocalVariable(scope: !24) +!27 = !DILocation(line: 109, column: 3, scope: !3) +!28 = !DILocation(line: 1, column: 2, scope: !3) +!32 = !DILocation(line: 2, column: 4, scope: !33, inlinedAt: !36) +!33 = distinct !DISubprogram(name: "callee", scope: !13, file: !1, type: !34, spFlags: DISPFlagDefinition, unit: !0) +!34 = !DISubroutineType(types: !35) +!35 = !{!8, !8} +!36 = !DILocation(line: 28, column: 5, scope: !3) +!37 = !DILocation(line: 135, column: 3, scope: !3) +!38 = distinct !DISubprogram(name: "empty_types", scope: !39, file: !1, type: !40, spFlags: DISPFlagDefinition, unit: !0, annotations: !42) +!39 = !DIModule(scope: !1, name: "module", configMacros: "bar", includePath: "/", apinotes: "/", file: !1, line: 42, isDecl: true) +!40 = !DISubroutineType(cc: DW_CC_normal, types: !41) +!41 = !{} +!42 = !{!43} +!43 = !{!"foo", !"bar"} +!44 = !DILocation(line: 140, column: 3, scope: !38) From 05e94c95fa7c14b14a8da571ea65c53714e6e08f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Fri, 14 Nov 2025 21:23:24 +0300 Subject: [PATCH 08/56] [Xtensa] TableGen-erate SDNode descriptions (#166253) Part of #119709. --- llvm/lib/Target/Xtensa/CMakeLists.txt | 2 + llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 1 + llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 53 +--------------- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 63 ------------------- .../Target/Xtensa/XtensaSelectionDAGInfo.cpp | 19 ++++++ .../Target/Xtensa/XtensaSelectionDAGInfo.h | 28 +++++++++ llvm/lib/Target/Xtensa/XtensaSubtarget.cpp | 11 +++- llvm/lib/Target/Xtensa/XtensaSubtarget.h | 10 +-- 8 files changed, 66 insertions(+), 121 deletions(-) create mode 100644 llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp create mode 100644 llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h diff --git a/llvm/lib/Target/Xtensa/CMakeLists.txt b/llvm/lib/Target/Xtensa/CMakeLists.txt index c698b42b00d10..9d3a89380441a 100644 --- a/llvm/lib/Target/Xtensa/CMakeLists.txt +++ b/llvm/lib/Target/Xtensa/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM XtensaGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM XtensaGenInstrInfo.inc -gen-instr-info) tablegen(LLVM XtensaGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM XtensaGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM XtensaGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM XtensaGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(XtensaCommonTableGen) @@ -22,6 +23,7 @@ add_llvm_target(XtensaCodeGen XtensaISelDAGToDAG.cpp XtensaISelLowering.cpp XtensaRegisterInfo.cpp + XtensaSelectionDAGInfo.cpp XtensaSubtarget.cpp XtensaTargetMachine.cpp diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 959553de99392..655b40fc57b97 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -12,6 +12,7 @@ #include "MCTargetDesc/XtensaMCTargetDesc.h" #include "Xtensa.h" +#include "XtensaSelectionDAGInfo.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index c211777e69894..71c98621c81ee 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -15,6 +15,7 @@ #include "XtensaConstantPoolValue.h" #include "XtensaInstrInfo.h" #include "XtensaMachineFunctionInfo.h" +#include "XtensaSelectionDAGInfo.h" #include "XtensaSubtarget.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -1510,58 +1511,6 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, } } -const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - case XtensaISD::BR_JT: - return "XtensaISD::BR_JT"; - case XtensaISD::CALL: - return "XtensaISD::CALL"; - case XtensaISD::CALLW8: - return "XtensaISD::CALLW8"; - case XtensaISD::EXTUI: - return "XtensaISD::EXTUI"; - case XtensaISD::MOVSP: - return "XtensaISD::MOVSP"; - case XtensaISD::PCREL_WRAPPER: - return "XtensaISD::PCREL_WRAPPER"; - case XtensaISD::RET: - return "XtensaISD::RET"; - case XtensaISD::RETW: - return "XtensaISD::RETW"; - case XtensaISD::RUR: - return "XtensaISD::RUR"; - case XtensaISD::SELECT_CC: - return "XtensaISD::SELECT_CC"; - case XtensaISD::SELECT_CC_FP: - return "XtensaISD::SELECT_CC_FP"; - case XtensaISD::SRCL: - return "XtensaISD::SRCL"; - case XtensaISD::SRCR: - return "XtensaISD::SRCR"; - case XtensaISD::CMPUO: - return "XtensaISD::CMPUO"; - case XtensaISD::CMPUEQ: - return "XtensaISD::CMPUEQ"; - case XtensaISD::CMPULE: - return "XtensaISD::CMPULE"; - case XtensaISD::CMPULT: - return "XtensaISD::CMPULT"; - case XtensaISD::CMPOEQ: - return "XtensaISD::CMPOEQ"; - case XtensaISD::CMPOLE: - return "XtensaISD::CMPOLE"; - case XtensaISD::CMPOLT: - return "XtensaISD::CMPOLT"; - case XtensaISD::MADD: - return "XtensaISD::MADD"; - case XtensaISD::MSUB: - return "XtensaISD::MSUB"; - case XtensaISD::MOVS: - return "XtensaISD::MOVS"; - } - return nullptr; -} - TargetLowering::AtomicExpansionKind XtensaTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { return AtomicExpansionKind::CmpXChg; diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index d84cbdb6afcef..829de0fe9c161 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -20,67 +20,6 @@ namespace llvm { -namespace XtensaISD { -enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - BR_JT, - - // Calls a function. Operand 0 is the chain operand and operand 1 - // is the target address. The arguments start at operand 2. - // There is an optional glue operand at the end. - CALL, - // Call with rotation window by 8 registers - CALLW8, - - // Extract unsigned immediate. Operand 0 is value, operand 1 - // is bit position of the field [0..31], operand 2 is bit size - // of the field [1..16] - EXTUI, - - MOVSP, - - // Wraps a TargetGlobalAddress that should be loaded using PC-relative - // accesses. Operand 0 is the address. - PCREL_WRAPPER, - RET, - RETW, - - RUR, - - // Select with condition operator - This selects between a true value and - // a false value (ops #2 and #3) based on the boolean result of comparing - // the lhs and rhs (ops #0 and #1) of a conditional expression with the - // condition code in op #4 - SELECT_CC, - // Select with condition operator - This selects between a true value and - // a false value (ops #2 and #3) based on the boolean result of comparing - // f32 operands lhs and rhs (ops #0 and #1) of a conditional expression - // with the condition code in op #4 and boolean branch kind in op #5 - SELECT_CC_FP, - - // SRCL(R) performs shift left(right) of the concatenation of 2 registers - // and returns high(low) 32-bit part of 64-bit result - SRCL, - // Shift Right Combined - SRCR, - - // Floating point unordered compare conditions - CMPUEQ, - CMPULE, - CMPULT, - CMPUO, - // Floating point compare conditions - CMPOEQ, - CMPOLE, - CMPOLT, - // FP multipy-add/sub - MADD, - MSUB, - // FP move - MOVS, -}; -} - class XtensaSubtarget; class XtensaTargetLowering : public TargetLowering { @@ -104,8 +43,6 @@ class XtensaTargetLowering : public TargetLowering { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..79fa81d424809 --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "XtensaSelectionDAGInfo.h" + +#define GET_SDNODE_DESC +#include "XtensaGenSDNodeInfo.inc" + +using namespace llvm; + +XtensaSelectionDAGInfo::XtensaSelectionDAGInfo() + : SelectionDAGGenTargetInfo(XtensaGenSDNodeInfo) {} + +XtensaSelectionDAGInfo::~XtensaSelectionDAGInfo() = default; diff --git a/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h new file mode 100644 index 0000000000000..6fb22817db9f3 --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_XTENSA_XTENSASELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_XTENSA_XTENSASELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SDNODE_ENUM +#include "XtensaGenSDNodeInfo.inc" + +namespace llvm { + +class XtensaSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + XtensaSelectionDAGInfo(); + + ~XtensaSelectionDAGInfo() override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_XTENSA_XTENSASELECTIONDAGINFO_H diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp b/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp index 6b1d3255db247..c1a1efc85eb55 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "XtensaSubtarget.h" +#include "XtensaSelectionDAGInfo.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" @@ -39,4 +40,12 @@ XtensaSubtarget::XtensaSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : XtensaGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(), FrameLowering(*this) {} + FrameLowering(*this) { + TSInfo = std::make_unique(); +} + +XtensaSubtarget::~XtensaSubtarget() = default; + +const SelectionDAGTargetInfo *XtensaSubtarget::getSelectionDAGInfo() const { + return TSInfo.get(); +} diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h index b406534a0ec77..6a5201b6f6f32 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h @@ -17,7 +17,6 @@ #include "XtensaISelLowering.h" #include "XtensaInstrInfo.h" #include "XtensaRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -38,7 +37,7 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { const Triple &TargetTriple; XtensaInstrInfo InstrInfo; XtensaTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + std::unique_ptr TSInfo; XtensaFrameLowering FrameLowering; XtensaSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); @@ -47,6 +46,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { XtensaSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM); + ~XtensaSubtarget() override; + const Triple &getTargetTriple() const { return TargetTriple; } const TargetFrameLowering *getFrameLowering() const override { @@ -60,9 +61,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { const XtensaTargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; bool hasDensity() const { return HasDensity; } bool hasMAC16() const { return HasMAC16; } From bbece4b78b073a2377a74158f6b33d164df4d179 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Fri, 14 Nov 2025 23:56:11 +0530 Subject: [PATCH 09/56] [libc] replace for loops with a call to memcpy in File (#165219) Addresses `TODO`s in file.cpp by replacing data copies via for loops with calls to inline_memcpy. Signed-off-by: Shreeyash Pandey --- libc/src/__support/File/file.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 4217e73828388..15ec1a23e2b8d 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -15,6 +15,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/libc_errno.h" // For error macros #include "src/__support/macros/config.h" +#include "src/string/memory_utils/inline_memcpy.h" namespace LIBC_NAMESPACE_DECL { @@ -85,9 +86,7 @@ FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) { cpp::span bufref(static_cast(buf), bufsize); // Copy the first piece into the buffer. - // TODO: Replace the for loop below with a call to internal memcpy. - for (size_t i = 0; i < primary.size(); ++i) - bufref[pos + i] = primary[i]; + inline_memcpy(bufref.data() + pos, primary.data(), primary.size()); pos += primary.size(); // If there is no remainder, we can return early, since the first piece has @@ -115,9 +114,7 @@ FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) { // know that if the second piece has data in it then the buffer has been // flushed, meaning that pos is always 0. if (remainder.size() < bufsize) { - // TODO: Replace the for loop below with a call to internal memcpy. - for (size_t i = 0; i < remainder.size(); ++i) - bufref[i] = remainder[i]; + inline_memcpy(bufref.data(), remainder.data(), remainder.size()); pos = remainder.size(); } else { @@ -209,9 +206,7 @@ size_t File::copy_data_from_buf(uint8_t *data, size_t len) { // available_data is never a wrapped around value. size_t available_data = read_limit - pos; if (len <= available_data) { - // TODO: Replace the for loop below with a call to internal memcpy. - for (size_t i = 0; i < len; ++i) - dataref[i] = bufref[i + pos]; + inline_memcpy(dataref.data(), bufref.data() + pos, len); pos += len; return len; } @@ -255,8 +250,7 @@ FileIOResult File::read_unlocked_fbf(uint8_t *data, size_t len) { size_t fetched_size = result.value; read_limit += fetched_size; size_t transfer_size = fetched_size >= to_fetch ? to_fetch : fetched_size; - for (size_t i = 0; i < transfer_size; ++i) - dataref[i] = buf[i]; + inline_memcpy(dataref.data(), buf, transfer_size); pos += transfer_size; if (result.has_error() || fetched_size < to_fetch) { if (!result.has_error()) From ac6daa8181894e34b8cf8e5c3e065f64035fcd36 Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Fri, 14 Nov 2025 10:26:21 -0800 Subject: [PATCH 10/56] [BOLT][print] Add option '--print-only-file' (NFC) (#168023) With this option we can pass to BOLT names of functions to be printed through a file instead of specifying them all on command line. --- bolt/include/bolt/Rewrite/RewriteInstance.h | 3 +++ bolt/lib/Core/BinaryFunction.cpp | 10 ++------ bolt/lib/Rewrite/RewriteInstance.cpp | 27 ++++++++++++++------- bolt/lib/Utils/CommandLineOpts.cpp | 10 ++++++++ bolt/test/print-only.test | 25 +++++++++++++++++++ 5 files changed, 58 insertions(+), 17 deletions(-) create mode 100644 bolt/test/print-only.test diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 0fe2e32b61933..35abf6b4d4ddd 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -139,6 +139,9 @@ class RewriteInstance { void handleRelocation(const object::SectionRef &RelocatedSection, const RelocationRef &Rel); + /// Collect functions that are specified to be bumped. + void selectFunctionsToPrint(); + /// Mark functions that are not meant for processing as ignored. void selectFunctionsToProcess(); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index a0d8385aa3824..a5fdf79a737f5 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -61,6 +61,8 @@ extern cl::OptionCategory BoltOptCategory; extern cl::opt EnableBAT; extern cl::opt Instrument; +extern cl::list PrintOnly; +extern cl::opt PrintOnlyFile; extern cl::opt StrictMode; extern cl::opt UpdateDebugSections; extern cl::opt Verbosity; @@ -133,14 +135,6 @@ PrintDynoStatsOnly("print-dyno-stats-only", cl::Hidden, cl::cat(BoltCategory)); -static cl::list -PrintOnly("print-only", - cl::CommaSeparated, - cl::desc("list of functions to print"), - cl::value_desc("func1,func2,func3,..."), - cl::Hidden, - cl::cat(BoltCategory)); - cl::opt TimeBuild("time-build", cl::desc("print time spent constructing binary functions"), diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 8d6731e7540a8..0e100bec01ca6 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -82,6 +82,8 @@ extern cl::opt Hugify; extern cl::opt Instrument; extern cl::opt KeepNops; extern cl::opt Lite; +extern cl::list PrintOnly; +extern cl::opt PrintOnlyFile; extern cl::list ReorderData; extern cl::opt ReorderFunctions; extern cl::opt TerminalHLT; @@ -730,6 +732,8 @@ Error RewriteInstance::run() { << "\n"; BC->outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; + selectFunctionsToPrint(); + if (Error E = discoverStorage()) return E; if (Error E = readSpecialSections()) @@ -3100,17 +3104,22 @@ static BinaryFunction *getInitFunctionIfStaticBinary(BinaryContext &BC) { return BC.getBinaryFunctionAtAddress(BD->getAddress()); } +static void populateFunctionNames(cl::opt &FunctionNamesFile, + cl::list &FunctionNames) { + if (FunctionNamesFile.empty()) + return; + std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); + std::string FuncName; + while (std::getline(FuncsFile, FuncName)) + FunctionNames.push_back(FuncName); +} + +void RewriteInstance::selectFunctionsToPrint() { + populateFunctionNames(opts::PrintOnlyFile, opts::PrintOnly); +} + void RewriteInstance::selectFunctionsToProcess() { // Extend the list of functions to process or skip from a file. - auto populateFunctionNames = [](cl::opt &FunctionNamesFile, - cl::list &FunctionNames) { - if (FunctionNamesFile.empty()) - return; - std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); - std::string FuncName; - while (std::getline(FuncsFile, FuncName)) - FunctionNames.push_back(FuncName); - }; populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 5be04d2ceea94..b7eb209af8aca 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -245,6 +245,16 @@ cl::opt PrintCacheMetrics( cl::desc("calculate and print various metrics for instruction cache"), cl::cat(BoltOptCategory)); +cl::list PrintOnly("print-only", cl::CommaSeparated, + cl::desc("list of functions to print"), + cl::value_desc("func1,func2,func3,..."), + cl::Hidden, cl::cat(BoltCategory)); + +cl::opt + PrintOnlyFile("print-only-file", + cl::desc("file with list of functions to print"), cl::Hidden, + cl::cat(BoltCategory)); + cl::opt PrintSections("print-sections", cl::desc("print all registered sections"), cl::Hidden, cl::cat(BoltCategory)); diff --git a/bolt/test/print-only.test b/bolt/test/print-only.test new file mode 100644 index 0000000000000..5e8bcd0e77378 --- /dev/null +++ b/bolt/test/print-only.test @@ -0,0 +1,25 @@ +# Verify if `--print-only` and `--print-only-files` work fine. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -x c %p/Inputs/bolt_icf.cpp -o %t -Wl,-q +# RUN: llvm-bolt %t -o %t.bolt --icf=none --print-cfg \ +# RUN: --print-only=foo.*,bar.*,main.* 2>&1 | FileCheck %s + +# RUN: echo "bar.*" > %t.pof +# RUN: echo "main.*" >> %t.pof +# RUN: llvm-bolt %t -o %t.bolt --icf=none --print-cfg \ +# RUN: --print-only=foo.* --print-only-file=%t.pof \ +# RUN: 2>&1 | FileCheck %s + +# RUN: echo "foo.*" >> %t.pof +# RUN: llvm-bolt %t -o %t.bolt --icf=none --print-cfg \ +# RUN: --print-only-file=%t.pof 2>&1 | FileCheck %s + +# CHECK-NOT: Binary Function "fiz" after building cfg +# CHECK-NOT: Binary Function "faz" after building cfg +# CHECK-NOT: Binary Function "zip" after building cfg +# CHECK-NOT: Binary Function "zap" after building cfg +# CHECK: Binary Function "foo" after building cfg +# CHECK: Binary Function "bar" after building cfg +# CHECK: Binary Function "main" after building cfg From e7db040b796df5e7bda5226492038a3af34803ef Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Fri, 14 Nov 2025 23:57:32 +0530 Subject: [PATCH 11/56] [libc][test] split exit tests into two separate tests (#166355) _Exit(3) is a fairly simple syscall wrapper whereas exit(3) calls atexit-registered functions + whole lot of stuff that require support for sync primitives. Splitting the tests allows testing the former easily (especially for new port projects) --------- Signed-off-by: Shreeyash Pandey --- .../cmake/modules/LLVMLibCArchitectures.cmake | 31 +++++++++++++++++++ libc/include/sys/syscall.h.def | 3 +- libc/test/UnitTest/ExecuteFunctionUnix.cpp | 4 +-- libc/test/src/stdlib/CMakeLists.txt | 13 ++++++++ libc/test/src/stdlib/_Exit_test.cpp | 4 --- libc/test/src/stdlib/exit_test.cpp | 15 +++++++++ 6 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 libc/test/src/stdlib/exit_test.cpp diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index 6c730f807de6d..939fc1226a4e9 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -215,6 +215,37 @@ else() "Unsupported libc target operating system ${LIBC_TARGET_OS}") endif() +# If the compiler target triple is not the same as the triple specified by +# LIBC_TARGET_TRIPLE or LLVM_RUNTIMES_TARGET, we will add a --target option +# if the compiler is clang. If the compiler is GCC we just error out as there +# is no equivalent of an option like --target. +if(explicit_target_triple AND + (NOT (libc_compiler_triple STREQUAL explicit_target_triple))) + set(LIBC_CROSSBUILD TRUE) + if(CMAKE_COMPILER_IS_GNUCXX) + message(FATAL_ERROR + "GCC target triple (${libc_compiler_triple}) and the explicity " + "specified target triple (${explicit_target_triple}) do not match.") + else() + list(APPEND + LIBC_COMPILE_OPTIONS_DEFAULT "--target=${explicit_target_triple}") + endif() +endif() + +if(LIBC_TARGET_OS_IS_DARWIN) + execute_process( + COMMAND xcrun --sdk macosx --show-sdk-path + OUTPUT_VARIABLE MACOSX_SDK_PATH + RESULT_VARIABLE MACOSX_SDK_PATH_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(MACOSX_SDK_PATH_RESULT EQUAL 0) + list(APPEND LIBC_COMPILE_OPTIONS_DEFAULT "-I" "${MACOSX_SDK_PATH}/usr/include") + else() + message(WARNING "Could not find macOS SDK path. `xcrun --sdk macosx --show-sdk-path` failed.") + endif() +endif() + # Windows does not support full mode build. if (LIBC_TARGET_OS_IS_WINDOWS AND LLVM_LIBC_FULL_BUILD) message(FATAL_ERROR "Windows does not support full mode build.") diff --git a/libc/include/sys/syscall.h.def b/libc/include/sys/syscall.h.def index 60e5024e500e3..f7e53cc4942d5 100644 --- a/libc/include/sys/syscall.h.def +++ b/libc/include/sys/syscall.h.def @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SYS_SYSCALL_H #define LLVM_LIBC_SYS_SYSCALL_H -//TODO: Handle non-linux syscalls +#if defined(__linux__) #include @@ -2361,5 +2361,6 @@ #define SYS_writev __NR_writev #endif +#endif // __linux__ #endif // LLVM_LIBC_SYS_SYSCALL_H diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp index c0e85c2144005..ab18f7a2ebf52 100644 --- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp +++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp @@ -57,9 +57,7 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, int timeout_ms) { } ::close(pipe_fds[1]); - struct pollfd poll_fd { - pipe_fds[0], 0, 0 - }; + pollfd poll_fd{pipe_fds[0], POLLIN, 0}; // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. if (::poll(&poll_fd, 1, timeout_ms) == -1) { diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 42e8faa3fd69f..80aab080e36d4 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -398,6 +398,19 @@ if(LLVM_LIBC_FULL_BUILD) libc-stdlib-tests SRCS _Exit_test.cpp + DEPENDS + libc.src.__support.OSUtil.osutil + libc.src.stdlib._Exit + ) + + add_libc_test( + exit_test + # The EXPECT_EXITS test is only availible for unit tests. + UNIT_TEST_ONLY + SUITE + libc-stdlib-tests + SRCS + exit_test.cpp DEPENDS libc.src.stdlib._Exit libc.src.stdlib.exit diff --git a/libc/test/src/stdlib/_Exit_test.cpp b/libc/test/src/stdlib/_Exit_test.cpp index 333277dc01dca..57c432828c2f3 100644 --- a/libc/test/src/stdlib/_Exit_test.cpp +++ b/libc/test/src/stdlib/_Exit_test.cpp @@ -7,13 +7,9 @@ //===----------------------------------------------------------------------===// #include "src/stdlib/_Exit.h" -#include "src/stdlib/exit.h" #include "test/UnitTest/Test.h" TEST(LlvmLibcStdlib, _Exit) { EXPECT_EXITS([] { LIBC_NAMESPACE::_Exit(1); }, 1); EXPECT_EXITS([] { LIBC_NAMESPACE::_Exit(65); }, 65); - - EXPECT_EXITS([] { LIBC_NAMESPACE::exit(1); }, 1); - EXPECT_EXITS([] { LIBC_NAMESPACE::exit(65); }, 65); } diff --git a/libc/test/src/stdlib/exit_test.cpp b/libc/test/src/stdlib/exit_test.cpp new file mode 100644 index 0000000000000..5c82d8303036a --- /dev/null +++ b/libc/test/src/stdlib/exit_test.cpp @@ -0,0 +1,15 @@ +//===-- Unittests for exit -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/exit.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStdlib, exit) { + EXPECT_EXITS([] { LIBC_NAMESPACE::exit(1); }, 1); + EXPECT_EXITS([] { LIBC_NAMESPACE::exit(65); }, 65); +} From cfce4a6b9e0f9f20d946518a823a828b6f010cb4 Mon Sep 17 00:00:00 2001 From: lntue Date: Fri, 14 Nov 2025 13:28:09 -0500 Subject: [PATCH 12/56] [libc] Allow user-defined LIBC_ASSERT macro. (#168087) By only defining it if LIBC_ASSERT macro is not defined. Fixes https://github.com/llvm/llvm-project/issues/162392 --- libc/src/__support/libc_assert.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libc/src/__support/libc_assert.h b/libc/src/__support/libc_assert.h index ada1795ccb80a..6e0b5bd3d68b4 100644 --- a/libc/src/__support/libc_assert.h +++ b/libc/src/__support/libc_assert.h @@ -14,9 +14,11 @@ // The build is configured to just use the public API // for libc's internal assertions. +#ifndef LIBC_ASSERT #include #define LIBC_ASSERT(COND) assert(COND) +#endif // LIBC_ASSERT #else // Not LIBC_COPT_USE_C_ASSERT From 8b105cbaf009a05023eb7b72ec1c359c128eeffa Mon Sep 17 00:00:00 2001 From: SunilKuravinakop <98882378+SunilKuravinakop@users.noreply.github.com> Date: Sat, 15 Nov 2025 00:00:18 +0530 Subject: [PATCH 13/56] Revert "[Clang][OpenMP] Bug fix Default clause variable category" (#168083) Reverts llvm/llvm-project#165276 The newly added test failed on a number of buildbots. --- clang/lib/Sema/SemaOpenMP.cpp | 6 +- ...allel_default_variableCategory_codegen.cpp | 92 ------------------- 2 files changed, 3 insertions(+), 95 deletions(-) delete mode 100644 clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 31c8f0cd30c56..81c591a00cfc6 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1364,15 +1364,15 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, DefaultDataSharingAttributes IterDA = Iter->DefaultAttr; switch (Iter->DefaultVCAttr) { case DSA_VC_aggregate: - if (!D->getType()->isAggregateType()) + if (!VD->getType()->isAggregateType()) IterDA = DSA_none; break; case DSA_VC_pointer: - if (!D->getType()->isPointerType()) + if (!VD->getType()->isPointerType()) IterDA = DSA_none; break; case DSA_VC_scalar: - if (!D->getType()->isScalarType()) + if (!VD->getType()->isScalarType()) IterDA = DSA_none; break; case DSA_VC_all: diff --git a/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp b/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp deleted file mode 100644 index f7dc74c503537..0000000000000 --- a/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// RUN: %clangxx -Xclang -verify -Wno-vla -fopenmp -fopenmp-version=60 -x c++ -S -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -#include - -int global; -#define VECTOR_SIZE 4 -int main (int argc, char **argv) { - int i,n; - int x; - - n = VECTOR_SIZE; - - #pragma omp parallel masked firstprivate(x) num_threads(2) - { - int *xPtr = nullptr; - // scalar - #pragma omp task default(shared:scalar) - { - xPtr = &x; - } - #pragma omp taskwait - - // pointer - #pragma omp task default(shared:pointer) shared(x) - { - xPtr = &x; - } - #pragma omp taskwait - } - - int *aggregate[VECTOR_SIZE] = {0,0,0,0}; - - #pragma omp parallel masked num_threads(2) - { - // aggregate - #pragma omp task default(shared:aggregate) - for(i=0;i Date: Fri, 14 Nov 2025 10:30:25 -0800 Subject: [PATCH 14/56] [RDF] RegisterRef/RegisterId improvements. NFC (#168030) RegisterId can represent a physical register, a MCRegUnit, or an index into a side structure that stores register masks. These 3 types were encoded by using the physical reg, stack slot, and virtual register encoding partitions from the Register class. This encoding scheme alias wasn't well contained so Register::index2StackSlot and Register::stackSlotIndex appeared in multiple places. This patch gives RegisterRef its own encoding defines and separates it from Register. I've removed the generic idx() method in favor of getAsMCReg(), getAsMCRegUnit(), and getMaskIdx() for some degree of type safety. Some places used the RegisterId field of RegisterRef directly as a register. Those have been updated to use getAsMCReg. Some special cases for RegisterId 0 have been removed as it can be treated like a MCRegister by existing code. I think I want to rename the Reg field of RegisterRef to Id, but I'll do that in another patch. Additionally, callers of the RegisterRef constructor need to be audited for implicit conversions from Register/MCRegister to unsigned. --- llvm/include/llvm/CodeGen/RDFRegisters.h | 51 ++++++++++++++---------- llvm/lib/CodeGen/RDFGraph.cpp | 2 +- llvm/lib/CodeGen/RDFRegisters.cpp | 43 ++++++++++---------- 3 files changed, 50 insertions(+), 46 deletions(-) diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 4c15bf534d55f..bedc95a9da7f1 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -86,6 +86,11 @@ template struct IndexedSet { }; struct RegisterRef { +private: + static constexpr RegisterId MaskFlag = 1u << 30; + static constexpr RegisterId UnitFlag = 1u << 31; + +public: RegisterId Reg = 0; LaneBitmask Mask = LaneBitmask::getNone(); // Only for registers. @@ -99,7 +104,20 @@ struct RegisterRef { constexpr bool isUnit() const { return isUnitId(Reg); } constexpr bool isMask() const { return isMaskId(Reg); } - constexpr unsigned idx() const { return toIdx(Reg); } + constexpr MCRegister asMCReg() const { + assert(isReg()); + return Reg; + } + + constexpr MCRegUnit asMCRegUnit() const { + assert(isUnit()); + return Reg & ~UnitFlag; + } + + constexpr unsigned asMaskIdx() const { + assert(isMask()); + return Reg & ~MaskFlag; + } constexpr operator bool() const { return !isReg() || (Reg != 0 && Mask.any()); @@ -110,26 +128,15 @@ struct RegisterRef { std::hash{}(Mask.getAsInteger()); } - static constexpr bool isRegId(unsigned Id) { - return Register::isPhysicalRegister(Id); - } - static constexpr bool isUnitId(unsigned Id) { - return Register::isVirtualRegister(Id); + static constexpr bool isRegId(RegisterId Id) { + return !(Id & UnitFlag) && !(Id & MaskFlag); } - static constexpr bool isMaskId(unsigned Id) { return Register(Id).isStack(); } + static constexpr bool isUnitId(RegisterId Id) { return Id & UnitFlag; } + static constexpr bool isMaskId(RegisterId Id) { return Id & MaskFlag; } - static constexpr RegisterId toUnitId(unsigned Idx) { - return Idx | Register::VirtualRegFlag; - } + static constexpr RegisterId toUnitId(unsigned Idx) { return Idx | UnitFlag; } - static constexpr unsigned toIdx(RegisterId Id) { - // Not using virtReg2Index or stackSlot2Index, because they are - // not constexpr. - if (isUnitId(Id)) - return Id & ~Register::VirtualRegFlag; - // RegId and MaskId are unchanged. - return Id; - } + static constexpr RegisterId toMaskId(unsigned Idx) { return Idx | MaskFlag; } bool operator<(RegisterRef) const = delete; bool operator==(RegisterRef) const = delete; @@ -141,11 +148,11 @@ struct PhysicalRegisterInfo { const MachineFunction &mf); RegisterId getRegMaskId(const uint32_t *RM) const { - return Register::index2StackSlot(RegMasks.find(RM)); + return RegisterRef::toMaskId(RegMasks.find(RM)); } const uint32_t *getRegMaskBits(RegisterId R) const { - return RegMasks.get(Register(R).stackSlotIndex()); + return RegMasks.get(RegisterRef(R).asMaskIdx()); } bool alias(RegisterRef RA, RegisterRef RB) const; @@ -158,7 +165,7 @@ struct PhysicalRegisterInfo { } const BitVector &getMaskUnits(RegisterId MaskId) const { - return MaskInfos[Register(MaskId).stackSlotIndex()].Units; + return MaskInfos[RegisterRef(MaskId).asMaskIdx()].Units; } std::set getUnits(RegisterRef RR) const; @@ -167,7 +174,7 @@ struct PhysicalRegisterInfo { return AliasInfos[U].Regs; } - RegisterRef mapTo(RegisterRef RR, unsigned R) const; + RegisterRef mapTo(RegisterRef RR, RegisterId R) const; const TargetRegisterInfo &getTRI() const { return TRI; } bool equal_to(RegisterRef A, RegisterRef B) const; diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index bbd3292fd46de..2fb3d4ed30f24 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -1827,7 +1827,7 @@ bool DataFlowGraph::hasUntrackedRef(Stmt S, bool IgnoreReserved) const { for (Ref R : S.Addr->members(*this)) { Ops.push_back(&R.Addr->getOp()); RegisterRef RR = R.Addr->getRegRef(*this); - if (IgnoreReserved && RR.isReg() && ReservedRegs[RR.idx()]) + if (IgnoreReserved && RR.isReg() && ReservedRegs[RR.asMCReg().id()]) continue; if (!isTracked(RR)) return true; diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index e4b63a3a40805..3821f3b791bbd 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -126,13 +126,10 @@ std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { std::set PhysicalRegisterInfo::getUnits(RegisterRef RR) const { std::set Units; - if (RR.Reg == 0) - return Units; // Empty - if (RR.isReg()) { if (RR.Mask.none()) return Units; // Empty - for (MCRegUnitMaskIterator UM(RR.idx(), &TRI); UM.isValid(); ++UM) { + for (MCRegUnitMaskIterator UM(RR.asMCReg(), &TRI); UM.isValid(); ++UM) { auto [U, M] = *UM; if ((M & RR.Mask).any()) Units.insert(U); @@ -142,7 +139,7 @@ std::set PhysicalRegisterInfo::getUnits(RegisterRef RR) const { assert(RR.isMask()); unsigned NumRegs = TRI.getNumRegs(); - const uint32_t *MB = getRegMaskBits(RR.idx()); + const uint32_t *MB = getRegMaskBits(RR.Reg); for (unsigned I = 0, E = (NumRegs + 31) / 32; I != E; ++I) { uint32_t C = ~MB[I]; // Clobbered regs if (I == 0) // Reg 0 should be ignored @@ -162,12 +159,13 @@ std::set PhysicalRegisterInfo::getUnits(RegisterRef RR) const { return Units; } -RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const { +RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, RegisterId R) const { if (RR.Reg == R) return RR; - if (unsigned Idx = TRI.getSubRegIndex(R, RR.Reg)) + if (unsigned Idx = TRI.getSubRegIndex(RegisterRef(R).asMCReg(), RR.asMCReg())) return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask)); - if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) { + if (unsigned Idx = + TRI.getSubRegIndex(RR.asMCReg(), RegisterRef(R).asMCReg())) { const RegInfo &RI = RegInfos[R]; LaneBitmask RCM = RI.RegClass ? RI.RegClass->LaneMask : LaneBitmask::getAll(); @@ -187,8 +185,8 @@ bool PhysicalRegisterInfo::equal_to(RegisterRef A, RegisterRef B) const { return A.Mask == B.Mask; // Compare reg units lexicographically. - MCRegUnitMaskIterator AI(A.Reg, &getTRI()); - MCRegUnitMaskIterator BI(B.Reg, &getTRI()); + MCRegUnitMaskIterator AI(A.asMCReg(), &getTRI()); + MCRegUnitMaskIterator BI(B.asMCReg(), &getTRI()); while (AI.isValid() && BI.isValid()) { auto [AReg, AMask] = *AI; auto [BReg, BMask] = *BI; @@ -225,8 +223,8 @@ bool PhysicalRegisterInfo::less(RegisterRef A, RegisterRef B) const { return A.Reg < B.Reg; // Compare reg units lexicographically. - llvm::MCRegUnitMaskIterator AI(A.Reg, &getTRI()); - llvm::MCRegUnitMaskIterator BI(B.Reg, &getTRI()); + llvm::MCRegUnitMaskIterator AI(A.asMCReg(), &getTRI()); + llvm::MCRegUnitMaskIterator BI(B.asMCReg(), &getTRI()); while (AI.isValid() && BI.isValid()) { auto [AReg, AMask] = *AI; auto [BReg, BMask] = *BI; @@ -252,18 +250,17 @@ bool PhysicalRegisterInfo::less(RegisterRef A, RegisterRef B) const { } void PhysicalRegisterInfo::print(raw_ostream &OS, RegisterRef A) const { - if (A.Reg == 0 || A.isReg()) { - if (0 < A.idx() && A.idx() < TRI.getNumRegs()) - OS << TRI.getName(A.idx()); + if (A.isReg()) { + MCRegister Reg = A.asMCReg(); + if (Reg && Reg.id() < TRI.getNumRegs()) + OS << TRI.getName(Reg); else - OS << printReg(A.idx(), &TRI); + OS << printReg(Reg, &TRI); OS << PrintLaneMaskShort(A.Mask); } else if (A.isUnit()) { - OS << printRegUnit(A.idx(), &TRI); + OS << printRegUnit(A.asMCRegUnit(), &TRI); } else { - assert(A.isMask()); - // RegMask SS flag is preserved by idx(). - unsigned Idx = Register(A.idx()).stackSlotIndex(); + unsigned Idx = A.asMaskIdx(); const char *Fmt = Idx < 0x10000 ? "%04x" : "%08x"; OS << "M#" << format(Fmt, Idx); } @@ -280,7 +277,7 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const { if (RR.isMask()) return Units.anyCommon(PRI.getMaskUnits(RR.Reg)); - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { + for (MCRegUnitMaskIterator U(RR.asMCReg(), &PRI.getTRI()); U.isValid(); ++U) { auto [Unit, LaneMask] = *U; if ((LaneMask & RR.Mask).any()) if (Units.test(Unit)) @@ -295,7 +292,7 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const { return T.reset(Units).none(); } - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { + for (MCRegUnitMaskIterator U(RR.asMCReg(), &PRI.getTRI()); U.isValid(); ++U) { auto [Unit, LaneMask] = *U; if ((LaneMask & RR.Mask).any()) if (!Units.test(Unit)) @@ -310,7 +307,7 @@ RegisterAggr &RegisterAggr::insert(RegisterRef RR) { return *this; } - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { + for (MCRegUnitMaskIterator U(RR.asMCReg(), &PRI.getTRI()); U.isValid(); ++U) { auto [Unit, LaneMask] = *U; if ((LaneMask & RR.Mask).any()) Units.set(Unit); From d06a7dd35e5a1d7b6309930cdf13033a3d08b542 Mon Sep 17 00:00:00 2001 From: Fabrice de Gans Date: Fri, 14 Nov 2025 13:35:34 -0500 Subject: [PATCH 15/56] lldb: Link delayimp on Windows (#168093) This is needed when building with `LLVM_LINK_LLVM_DYLIB` to build LLVM as a DLL on Windows. This effort is tracked in #109483. --- lldb/cmake/modules/AddLLDB.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index 5d58abf237f58..6493df27f38db 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -172,6 +172,7 @@ function(add_lldb_executable name) if(NOT LIBLLDB_INDEX EQUAL -1) if (MSVC) target_link_options(${name} PRIVATE "/DELAYLOAD:$") + target_link_libraries(${name} PRIVATE delayimp) elseif (MINGW AND LINKER_IS_LLD) # LLD can delay load just by passing a --delayload flag, as long as the import # library is a short type import library (which LLD and MS link.exe produce). From 5b798df8c48aab6db287aaf3ae45ff32283c4f9c Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Fri, 14 Nov 2025 13:36:05 -0500 Subject: [PATCH 16/56] Revert "[libc][test] split exit tests into two separate tests" (#168102) Reverts llvm/llvm-project#166355 --- .../cmake/modules/LLVMLibCArchitectures.cmake | 31 ------------------- libc/include/sys/syscall.h.def | 3 +- libc/test/UnitTest/ExecuteFunctionUnix.cpp | 4 ++- libc/test/src/stdlib/CMakeLists.txt | 13 -------- libc/test/src/stdlib/_Exit_test.cpp | 4 +++ libc/test/src/stdlib/exit_test.cpp | 15 --------- 6 files changed, 8 insertions(+), 62 deletions(-) delete mode 100644 libc/test/src/stdlib/exit_test.cpp diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index 939fc1226a4e9..6c730f807de6d 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -215,37 +215,6 @@ else() "Unsupported libc target operating system ${LIBC_TARGET_OS}") endif() -# If the compiler target triple is not the same as the triple specified by -# LIBC_TARGET_TRIPLE or LLVM_RUNTIMES_TARGET, we will add a --target option -# if the compiler is clang. If the compiler is GCC we just error out as there -# is no equivalent of an option like --target. -if(explicit_target_triple AND - (NOT (libc_compiler_triple STREQUAL explicit_target_triple))) - set(LIBC_CROSSBUILD TRUE) - if(CMAKE_COMPILER_IS_GNUCXX) - message(FATAL_ERROR - "GCC target triple (${libc_compiler_triple}) and the explicity " - "specified target triple (${explicit_target_triple}) do not match.") - else() - list(APPEND - LIBC_COMPILE_OPTIONS_DEFAULT "--target=${explicit_target_triple}") - endif() -endif() - -if(LIBC_TARGET_OS_IS_DARWIN) - execute_process( - COMMAND xcrun --sdk macosx --show-sdk-path - OUTPUT_VARIABLE MACOSX_SDK_PATH - RESULT_VARIABLE MACOSX_SDK_PATH_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - if(MACOSX_SDK_PATH_RESULT EQUAL 0) - list(APPEND LIBC_COMPILE_OPTIONS_DEFAULT "-I" "${MACOSX_SDK_PATH}/usr/include") - else() - message(WARNING "Could not find macOS SDK path. `xcrun --sdk macosx --show-sdk-path` failed.") - endif() -endif() - # Windows does not support full mode build. if (LIBC_TARGET_OS_IS_WINDOWS AND LLVM_LIBC_FULL_BUILD) message(FATAL_ERROR "Windows does not support full mode build.") diff --git a/libc/include/sys/syscall.h.def b/libc/include/sys/syscall.h.def index f7e53cc4942d5..60e5024e500e3 100644 --- a/libc/include/sys/syscall.h.def +++ b/libc/include/sys/syscall.h.def @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SYS_SYSCALL_H #define LLVM_LIBC_SYS_SYSCALL_H -#if defined(__linux__) +//TODO: Handle non-linux syscalls #include @@ -2361,6 +2361,5 @@ #define SYS_writev __NR_writev #endif -#endif // __linux__ #endif // LLVM_LIBC_SYS_SYSCALL_H diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp index ab18f7a2ebf52..c0e85c2144005 100644 --- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp +++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp @@ -57,7 +57,9 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, int timeout_ms) { } ::close(pipe_fds[1]); - pollfd poll_fd{pipe_fds[0], POLLIN, 0}; + struct pollfd poll_fd { + pipe_fds[0], 0, 0 + }; // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. if (::poll(&poll_fd, 1, timeout_ms) == -1) { diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 80aab080e36d4..42e8faa3fd69f 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -398,19 +398,6 @@ if(LLVM_LIBC_FULL_BUILD) libc-stdlib-tests SRCS _Exit_test.cpp - DEPENDS - libc.src.__support.OSUtil.osutil - libc.src.stdlib._Exit - ) - - add_libc_test( - exit_test - # The EXPECT_EXITS test is only availible for unit tests. - UNIT_TEST_ONLY - SUITE - libc-stdlib-tests - SRCS - exit_test.cpp DEPENDS libc.src.stdlib._Exit libc.src.stdlib.exit diff --git a/libc/test/src/stdlib/_Exit_test.cpp b/libc/test/src/stdlib/_Exit_test.cpp index 57c432828c2f3..333277dc01dca 100644 --- a/libc/test/src/stdlib/_Exit_test.cpp +++ b/libc/test/src/stdlib/_Exit_test.cpp @@ -7,9 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/stdlib/_Exit.h" +#include "src/stdlib/exit.h" #include "test/UnitTest/Test.h" TEST(LlvmLibcStdlib, _Exit) { EXPECT_EXITS([] { LIBC_NAMESPACE::_Exit(1); }, 1); EXPECT_EXITS([] { LIBC_NAMESPACE::_Exit(65); }, 65); + + EXPECT_EXITS([] { LIBC_NAMESPACE::exit(1); }, 1); + EXPECT_EXITS([] { LIBC_NAMESPACE::exit(65); }, 65); } diff --git a/libc/test/src/stdlib/exit_test.cpp b/libc/test/src/stdlib/exit_test.cpp deleted file mode 100644 index 5c82d8303036a..0000000000000 --- a/libc/test/src/stdlib/exit_test.cpp +++ /dev/null @@ -1,15 +0,0 @@ -//===-- Unittests for exit -----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/stdlib/exit.h" -#include "test/UnitTest/Test.h" - -TEST(LlvmLibcStdlib, exit) { - EXPECT_EXITS([] { LIBC_NAMESPACE::exit(1); }, 1); - EXPECT_EXITS([] { LIBC_NAMESPACE::exit(65); }, 65); -} From 44b94a4f750cec5bf1fc819c35ada5a834511256 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Fri, 14 Nov 2025 10:36:47 -0800 Subject: [PATCH 17/56] [AArch64][DebugInfo]Add Target hooks for InstrRef on AArch64 (#165953) This patch adds the target hooks required by Instruction Referencing for the AArch64 target, as mentioned in https://llvm.org/docs/InstrRefDebugInfo.html#target-hooks Which allows the Instruction Referenced LiveDebugValues Pass to track spills and restore instructions. With this patch we can use the `llvm/utils/llvm-locstats/llvm-locstats.py` to see the coverage statistics on a clang.dSYM built with in RelWithDebInfo we can see: coverage with dbg_value: ``` ================================================= Debug Location Statistics ================================================= cov% samples percentage(~) ------------------------------------------------- 0% 5828021 38% (0%,10%) 127739 0% [10%,20%) 143344 0% [20%,30%) 172100 1% [30%,40%) 193173 1% [40%,50%) 127366 0% [50%,60%) 308350 2% [60%,70%) 257055 1% [70%,80%) 212410 1% [80%,90%) 295316 1% [90%,100%) 349280 2% 100% 7313157 47% ================================================= -the number of debug variables processed: 15327311 -PC ranges covered: 67% ------------------------------------------------- -total availability: 62% ================================================= ``` coverage with InstrRef without target hooks fix: ``` ================================================= Debug Location Statistics ================================================= cov% samples percentage(~) ------------------------------------------------- 0% 6052807 39% (0%,10%) 127710 0% [10%,20%) 129999 0% [20%,30%) 155011 1% [30%,40%) 171206 1% [40%,50%) 102861 0% [50%,60%) 264734 1% [60%,70%) 212386 1% [70%,80%) 176872 1% [80%,90%) 242120 1% [90%,100%) 254465 1% 100% 7437215 48% ================================================= -the number of debug variables processed: 15327386 -PC ranges covered: 67% ------------------------------------------------- -total availability: 60% ================================================= ``` coverage with InstrRef with target hooks fix: ``` ================================================= Debug Location Statistics ================================================= cov% samples percentage(~) ------------------------------------------------- 0% 5972267 39% (0%,10%) 118873 0% [10%,20%) 127138 0% [20%,30%) 153181 1% [30%,40%) 170102 1% [40%,50%) 102180 0% [50%,60%) 263672 1% [60%,70%) 212865 1% [70%,80%) 176633 1% [80%,90%) 242403 1% [90%,100%) 264441 1% 100% 7494527 48% ================================================= -the number of debug variables processed: 15298282 -PC ranges covered: 71% ------------------------------------------------- -total availability: 61% ================================================= ``` I believe this should be a good indication that Instruction Referencing should be turned on for AArch64? --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 97 +- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 9 + .../AArch64/GlobalISel/arm64-atomic-128.ll | 72 +- .../AArch64/GlobalISel/arm64-atomic.ll | 1352 ++++---- .../atomic-anyextending-load-crash.ll | 8 +- .../CodeGen/AArch64/GlobalISel/byval-call.ll | 8 +- .../call-lowering-tail-call-fallback.ll | 2 +- .../irtranslator-stack-protector-windows.ll | 4 +- .../GlobalISel/select-fp-anyext-crash.ll | 4 +- .../GlobalISel/split-wide-shifts-multiway.ll | 354 +- .../GlobalISel/stacksave-stackrestore.ll | 4 +- .../CodeGen/AArch64/GlobalISel/swifterror.ll | 2 +- .../CodeGen/AArch64/aarch64-fastcc-stackup.ll | 8 +- .../aarch64-fixup-statepoint-regs-crash.ll | 2 +- llvm/test/CodeGen/AArch64/aarch64-mops.ll | 16 +- .../AArch64/addsub-constant-folding.ll | 72 +- .../CodeGen/AArch64/alias_mask_scalable.ll | 144 +- .../AArch64/alias_mask_scalable_nosve2.ll | 16 +- .../argument-blocks-array-of-struct.ll | 14 +- llvm/test/CodeGen/AArch64/arm64-fp128.ll | 430 +-- .../CodeGen/AArch64/arm64-memset-inline.ll | 68 +- .../CodeGen/AArch64/arm64-neon-mul-div.ll | 32 +- .../CodeGen/AArch64/arm64-register-pairing.ll | 8 +- .../CodeGen/AArch64/arm64-windows-calls.ll | 16 +- .../CodeGen/AArch64/arm64ec-entry-thunks.ll | 12 +- .../CodeGen/AArch64/arm64ec-reservedregs.ll | 16 +- llvm/test/CodeGen/AArch64/arm64ec-varargs.ll | 8 +- llvm/test/CodeGen/AArch64/atomicrmw-O0.ll | 264 +- llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll | 16 +- llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll | 16 +- llvm/test/CodeGen/AArch64/cmp-select-sign.ll | 8 +- .../CodeGen/AArch64/combine-storetomstore.ll | 8 +- .../AArch64/exception-handling-windows-elf.ll | 4 +- llvm/test/CodeGen/AArch64/fadd-combines.ll | 4 +- llvm/test/CodeGen/AArch64/fcmp-fp128.ll | 88 +- llvm/test/CodeGen/AArch64/fcmp.ll | 122 +- llvm/test/CodeGen/AArch64/fexplog.ll | 3000 ++++++++--------- .../fold-int-pow2-with-fmul-or-fdiv.ll | 24 +- llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll | 8 +- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 288 +- llvm/test/CodeGen/AArch64/fpext.ll | 112 +- llvm/test/CodeGen/AArch64/fpow.ll | 620 ++-- llvm/test/CodeGen/AArch64/fpowi.ll | 520 +-- llvm/test/CodeGen/AArch64/fptoi.ll | 400 +-- .../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 58 +- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 540 +-- .../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 54 +- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 352 +- llvm/test/CodeGen/AArch64/fptrunc.ll | 78 +- .../framelayout-sve-calleesaves-fix.mir | 4 +- llvm/test/CodeGen/AArch64/frem-power2.ll | 44 +- llvm/test/CodeGen/AArch64/frem.ll | 620 ++-- llvm/test/CodeGen/AArch64/fsincos.ll | 1200 +++---- .../implicit-def-subreg-to-reg-regression.ll | 6 +- llvm/test/CodeGen/AArch64/insertextract.ll | 60 +- .../AArch64/intrinsic-vector-match-sve2.ll | 12 +- llvm/test/CodeGen/AArch64/itofp.ll | 496 +-- llvm/test/CodeGen/AArch64/ldexp.ll | 20 +- .../CodeGen/AArch64/ldst-paired-aliasing.ll | 6 +- llvm/test/CodeGen/AArch64/llvm.exp10.ll | 268 +- llvm/test/CodeGen/AArch64/llvm.frexp.ll | 334 +- llvm/test/CodeGen/AArch64/llvm.modf.ll | 98 +- llvm/test/CodeGen/AArch64/llvm.sincos.ll | 226 +- llvm/test/CodeGen/AArch64/llvm.sincospi.ll | 22 +- llvm/test/CodeGen/AArch64/luti-with-sme2.ll | 12 +- llvm/test/CodeGen/AArch64/machine-combiner.ll | 16 +- .../machine-outliner-retaddr-sign-non-leaf.ll | 26 +- llvm/test/CodeGen/AArch64/mingw-refptr.ll | 4 +- .../named-vector-shuffle-reverse-neon.ll | 12 +- .../named-vector-shuffle-reverse-sve.ll | 4 +- llvm/test/CodeGen/AArch64/neon-dotreduce.ll | 68 +- llvm/test/CodeGen/AArch64/nontemporal.ll | 16 +- .../outlining-with-streaming-mode-changes.ll | 12 +- .../test/CodeGen/AArch64/perm-tb-with-sme2.ll | 28 +- llvm/test/CodeGen/AArch64/pow.ll | 36 +- llvm/test/CodeGen/AArch64/pr135821.ll | 8 +- llvm/test/CodeGen/AArch64/pr142314.ll | 4 +- llvm/test/CodeGen/AArch64/pr164181.ll | 96 +- llvm/test/CodeGen/AArch64/pr48188.ll | 24 +- .../CodeGen/AArch64/pr53315-returned-i128.ll | 8 +- llvm/test/CodeGen/AArch64/pr58516.ll | 8 +- .../CodeGen/AArch64/preserve_nonecc_call.ll | 20 +- .../AArch64/preserve_nonecc_varargs_aapcs.ll | 4 +- .../AArch64/preserve_nonecc_varargs_win64.ll | 4 +- llvm/test/CodeGen/AArch64/ragreedy-csr.ll | 32 +- .../AArch64/ragreedy-local-interval-cost.ll | 14 +- llvm/test/CodeGen/AArch64/rem.ll | 304 +- llvm/test/CodeGen/AArch64/settag-merge.ll | 2 +- llvm/test/CodeGen/AArch64/settag.ll | 8 +- llvm/test/CodeGen/AArch64/sibling-call.ll | 16 +- .../CodeGen/AArch64/sincos-stack-slots.ll | 12 +- .../AArch64/sls-stackprotector-outliner.ll | 16 +- llvm/test/CodeGen/AArch64/sme-agnostic-za.ll | 24 +- ...compatible-to-normal-fn-wihout-sme-attr.ll | 4 +- .../AArch64/sme-callee-save-restore-pairs.ll | 288 +- .../test/CodeGen/AArch64/sme-darwin-sve-vg.ll | 2 +- .../AArch64/sme-disable-gisel-fisel.ll | 96 +- llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll | 12 +- .../CodeGen/AArch64/sme-lazy-save-call.ll | 36 +- .../CodeGen/AArch64/sme-lazy-save-windows.ll | 4 +- .../AArch64/sme-must-save-lr-for-vg.ll | 2 +- .../CodeGen/AArch64/sme-new-za-function.ll | 42 +- .../test/CodeGen/AArch64/sme-peephole-opts.ll | 106 +- ...ate-sm-changing-call-disable-coalescing.ll | 240 +- .../CodeGen/AArch64/sme-streaming-body.ll | 44 +- .../CodeGen/AArch64/sme-streaming-checkvl.ll | 76 +- .../sme-streaming-compatible-interface.ll | 120 +- .../AArch64/sme-streaming-interface.ll | 132 +- .../sme-streaming-mode-changes-unwindinfo.ll | 2 +- ...nging-call-disable-stackslot-scavenging.ll | 12 +- llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll | 142 +- .../CodeGen/AArch64/sme-za-control-flow.ll | 20 +- .../test/CodeGen/AArch64/sme-za-exceptions.ll | 36 +- llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 12 +- .../AArch64/sme2-fp8-intrinsics-cvt.ll | 8 +- .../AArch64/sme2-intrinsics-int-dots.ll | 72 +- .../CodeGen/AArch64/sme2-intrinsics-ld1.ll | 128 +- .../CodeGen/AArch64/sme2-intrinsics-ldnt1.ll | 128 +- .../CodeGen/AArch64/sme2-intrinsics-qcvt.ll | 4 +- .../CodeGen/AArch64/sme2-intrinsics-qrshr.ll | 8 +- .../CodeGen/AArch64/sme2-intrinsics-vdot.ll | 24 +- .../AArch64/split-sve-stack-frame-layout.ll | 222 +- .../CodeGen/AArch64/stack-hazard-defaults.ll | 8 +- .../CodeGen/AArch64/stack-hazard-windows.ll | 40 +- llvm/test/CodeGen/AArch64/stack-hazard.ll | 1498 ++++---- .../CodeGen/AArch64/stack-probing-dynamic.ll | 16 +- .../test/CodeGen/AArch64/stack-probing-sve.ll | 28 +- llvm/test/CodeGen/AArch64/stack-probing.ll | 2 +- .../AArch64/statepoint-call-lowering.ll | 4 +- .../streaming-compatible-memory-ops.ll | 20 +- llvm/test/CodeGen/AArch64/sve-alloca.ll | 48 +- .../AArch64/sve-callee-save-restore-pairs.ll | 240 +- .../AArch64/sve-calling-convention-mixed.ll | 96 +- .../AArch64/sve-extract-scalable-vector.ll | 8 +- .../CodeGen/AArch64/sve-fixed-ld2-alloca.ll | 4 +- .../CodeGen/AArch64/sve-fixed-length-fp128.ll | 40 +- .../sve-fixed-length-frame-offests-crash.ll | 4 +- .../AArch64/sve-fixed-vector-llrint.ll | 140 +- .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 252 +- llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll | 24 +- llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll | 8 +- .../test/CodeGen/AArch64/sve-insert-vector.ll | 64 +- llvm/test/CodeGen/AArch64/sve-llrint.ll | 228 +- llvm/test/CodeGen/AArch64/sve-lrint.ll | 228 +- llvm/test/CodeGen/AArch64/sve-pred-arith.ll | 40 +- .../CodeGen/AArch64/sve-stack-frame-layout.ll | 68 +- .../sve-streaming-mode-fixed-length-fp-fma.ll | 4 +- ...e-streaming-mode-fixed-length-fp-to-int.ll | 8 +- ...streaming-mode-fixed-length-int-extends.ll | 24 +- ...ve-streaming-mode-fixed-length-int-mulh.ll | 112 +- ...e-streaming-mode-fixed-length-int-to-fp.ll | 4 +- ...streaming-mode-fixed-length-int-vselect.ll | 8 +- ...-streaming-mode-fixed-length-ld2-alloca.ll | 4 +- ...streaming-mode-fixed-length-masked-load.ll | 12 +- .../sve-streaming-mode-fixed-length-trunc.ll | 516 +-- llvm/test/CodeGen/AArch64/sve-tailcall.ll | 96 +- llvm/test/CodeGen/AArch64/sve-trunc.ll | 16 +- .../AArch64/sve2p1-intrinsics-loads.ll | 288 +- .../sve2p1-intrinsics-predicate-as-counter.ll | 32 +- .../AArch64/sve2p1-intrinsics-selx2.ll | 32 +- .../AArch64/sve2p1-intrinsics-selx4.ll | 32 +- .../AArch64/sve2p1-intrinsics-stores.ll | 128 +- llvm/test/CodeGen/AArch64/swift-async-win.ll | 4 +- llvm/test/CodeGen/AArch64/swifterror.ll | 464 +-- llvm/test/CodeGen/AArch64/trampoline.ll | 8 +- llvm/test/CodeGen/AArch64/unwind-preserved.ll | 160 +- llvm/test/CodeGen/AArch64/vec-libcalls.ll | 312 +- llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll | 16 +- .../vecreduce-fadd-legalization-strict.ll | 24 +- .../AArch64/vecreduce-fmax-legalization.ll | 16 +- .../AArch64/vecreduce-fmin-legalization.ll | 16 +- llvm/test/CodeGen/AArch64/vector-llrint.ll | 168 +- llvm/test/CodeGen/AArch64/vector-lrint.ll | 344 +- llvm/test/CodeGen/AArch64/win-sve.ll | 420 +-- llvm/test/CodeGen/AArch64/win64-fpowi.ll | 32 +- llvm/test/CodeGen/AArch64/win64_vararg.ll | 12 +- llvm/test/CodeGen/AArch64/win64_vararg2.ll | 16 +- .../CodeGen/AArch64/win64_vararg_float.ll | 8 +- .../CodeGen/AArch64/win64_vararg_float_cc.ll | 8 +- .../CodeGen/AArch64/win64cc-backup-x18.ll | 4 +- .../AArch64/wincfi-missing-seh-directives.ll | 6 +- .../instr-ref-target-hooks-sp-clobber.mir | 190 ++ .../AArch64/instr-ref-target-hooks.ll | 58 + 183 files changed, 11330 insertions(+), 11018 deletions(-) create mode 100644 llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir create mode 100644 llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 67ee5b6636cec..2c7aca8fc91ce 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2452,11 +2452,10 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { return false; } -Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const { - switch (MI.getOpcode()) { +static bool isFrameLoadOpcode(int Opcode) { + switch (Opcode) { default: - break; + return false; case AArch64::LDRWui: case AArch64::LDRXui: case AArch64::LDRBui: @@ -2465,22 +2464,27 @@ Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDR_PXI: - if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && - MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { - FrameIndex = MI.getOperand(1).getIndex(); - return MI.getOperand(0).getReg(); - } - break; + return true; } +} - return 0; +Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (!isFrameLoadOpcode(MI.getOpcode())) + return Register(); + + if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + return Register(); } -Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const { - switch (MI.getOpcode()) { +static bool isFrameStoreOpcode(int Opcode) { + switch (Opcode) { default: - break; + return false; case AArch64::STRWui: case AArch64::STRXui: case AArch64::STRBui: @@ -2489,14 +2493,63 @@ Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, case AArch64::STRDui: case AArch64::STRQui: case AArch64::STR_PXI: - if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && - MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { - FrameIndex = MI.getOperand(1).getIndex(); - return MI.getOperand(0).getReg(); - } - break; + return true; + } +} + +Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (!isFrameStoreOpcode(MI.getOpcode())) + return Register(); + + if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + return Register(); +} + +Register AArch64InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const { + if (!isFrameStoreOpcode(MI.getOpcode())) + return Register(); + + if (Register Reg = isStoreToStackSlot(MI, FrameIndex)) + return Reg; + + SmallVector Accesses; + if (hasStoreToStackSlot(MI, Accesses)) { + if (Accesses.size() > 1) + return Register(); + + FrameIndex = + cast(Accesses.front()->getPseudoValue()) + ->getFrameIndex(); + return MI.getOperand(0).getReg(); } - return 0; + return Register(); +} + +Register AArch64InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const { + if (!isFrameLoadOpcode(MI.getOpcode())) + return Register(); + + if (Register Reg = isLoadFromStackSlot(MI, FrameIndex)) + return Reg; + + SmallVector Accesses; + if (hasLoadFromStackSlot(MI, Accesses)) { + if (Accesses.size() > 1) + return Register(); + + FrameIndex = + cast(Accesses.front()->getPseudoValue()) + ->getFrameIndex(); + return MI.getOperand(0).getReg(); + } + return Register(); } /// Check all MachineMemOperands for a hint to suppress pairing. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 979c9acbd48e1..59b9c0da008f4 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -205,6 +205,15 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + /// Check for post-frame ptr elimination stack locations as well. This uses a + /// heuristic so it isn't reliable for correctness. + Register isStoreToStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const override; + /// Check for post-frame ptr elimination stack locations as well. This uses a + /// heuristic so it isn't reliable for correctness. + Register isLoadFromStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const override; + /// Does this instruction set its full destination register to zero? static bool isGPRZero(const MachineInstr &MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll index 1fe63c9be8c62..be51210882eaa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -89,23 +89,23 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap: ; CHECK-OUTLINE-LLSC-O0: // %bb.0: ; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5 ; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1 ; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0] -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32 ; CHECK-OUTLINE-LLSC-O0-NEXT: ret ; @@ -113,9 +113,9 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-CAS-O0: // %bb.0: ; CHECK-CAS-O0-NEXT: sub sp, sp, #16 ; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16 -; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill +; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill ; CHECK-CAS-O0-NEXT: mov x1, x5 -; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload +; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload ; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3 ; CHECK-CAS-O0-NEXT: mov x3, x5 ; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5 @@ -216,23 +216,23 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n ; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_monotonic_seqcst: ; CHECK-OUTLINE-LLSC-O0: // %bb.0: ; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5 ; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1 ; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0] -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32 ; CHECK-OUTLINE-LLSC-O0-NEXT: ret ; @@ -240,9 +240,9 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n ; CHECK-CAS-O0: // %bb.0: ; CHECK-CAS-O0-NEXT: sub sp, sp, #16 ; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16 -; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill +; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill ; CHECK-CAS-O0-NEXT: mov x1, x5 -; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload +; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload ; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3 ; CHECK-CAS-O0-NEXT: mov x3, x5 ; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5 @@ -343,23 +343,23 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne ; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_release_acquire: ; CHECK-OUTLINE-LLSC-O0: // %bb.0: ; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5 ; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1 ; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0] -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32 ; CHECK-OUTLINE-LLSC-O0-NEXT: ret ; @@ -367,9 +367,9 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne ; CHECK-CAS-O0: // %bb.0: ; CHECK-CAS-O0-NEXT: sub sp, sp, #16 ; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16 -; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill +; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill ; CHECK-CAS-O0-NEXT: mov x1, x5 -; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload +; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload ; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3 ; CHECK-CAS-O0-NEXT: mov x3, x5 ; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5 @@ -470,23 +470,23 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_monotonic: ; CHECK-OUTLINE-LLSC-O0: // %bb.0: ; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5 ; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq_rel ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x8, x0 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x8 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1 ; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x0] -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32 ; CHECK-OUTLINE-LLSC-O0-NEXT: ret ; @@ -494,9 +494,9 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; CHECK-CAS-O0: // %bb.0: ; CHECK-CAS-O0-NEXT: sub sp, sp, #16 ; CHECK-CAS-O0-NEXT: .cfi_def_cfa_offset 16 -; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill +; CHECK-CAS-O0-NEXT: str x3, [sp, #8] // 8-byte Spill ; CHECK-CAS-O0-NEXT: mov x1, x5 -; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Folded Reload +; CHECK-CAS-O0-NEXT: ldr x5, [sp, #8] // 8-byte Reload ; CHECK-CAS-O0-NEXT: // kill: def $x2 killed $x2 def $x2_x3 ; CHECK-CAS-O0-NEXT: mov x3, x5 ; CHECK-CAS-O0-NEXT: // kill: def $x4 killed $x4 def $x4_x5 @@ -580,22 +580,22 @@ define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) { ; CHECK-OUTLINE-LLSC-O0-LABEL: atomic_load_relaxed: ; CHECK-OUTLINE-LLSC-O0: // %bb.0: ; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x4, x2 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x3, [sp, #8] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x3, [sp, #8] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, xzr ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_relax -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x3, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: // implicit-def: $q0 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[0], x0 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov v0.d[1], x1 ; CHECK-OUTLINE-LLSC-O0-NEXT: str q0, [x3] -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32 ; CHECK-OUTLINE-LLSC-O0-NEXT: ret ; @@ -690,17 +690,17 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-OUTLINE-LLSC-O0-LABEL: val_compare_and_swap_return: ; CHECK-OUTLINE-LLSC-O0: // %bb.0: ; CHECK-OUTLINE-LLSC-O0-NEXT: sub sp, sp, #32 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-LLSC-O0-NEXT: .cfi_offset w30, -16 -; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-OUTLINE-LLSC-O0-NEXT: str x0, [sp, #8] // 8-byte Spill ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x0, x2 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x1, x3 ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x2, x4 -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x4, [sp, #8] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: mov x3, x5 ; CHECK-OUTLINE-LLSC-O0-NEXT: bl __aarch64_cas16_acq -; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-OUTLINE-LLSC-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-OUTLINE-LLSC-O0-NEXT: add sp, sp, #32 ; CHECK-OUTLINE-LLSC-O0-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index e6bf3ab674717..3f51ec747182a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -56,10 +56,10 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 { ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 ; CHECK-OUTLINE-O0-NEXT: mov w1, w2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -133,10 +133,10 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 { ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 ; CHECK-OUTLINE-O0-NEXT: mov x8, x2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldr w1, [x8] ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload @@ -211,10 +211,10 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) #0 { ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 ; CHECK-OUTLINE-O0-NEXT: mov w1, w2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -285,10 +285,10 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) #0 { ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 ; CHECK-OUTLINE-O0-NEXT: mov x1, x2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -359,10 +359,10 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new) ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 ; CHECK-OUTLINE-O0-NEXT: mov x1, x2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -433,10 +433,10 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new) ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #32 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 ; CHECK-OUTLINE-O0-NEXT: mov x1, x2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -489,15 +489,15 @@ define i32 @fetch_and_nand(ptr %p) #0 { ; CHECK-NOLSE-O0-LABEL: fetch_and_nand: ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB6_1 ; CHECK-NOLSE-O0-NEXT: LBB6_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB6_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: and w9, w8, #0x7 ; CHECK-NOLSE-O0-NEXT: mvn w12, w9 ; CHECK-NOLSE-O0-NEXT: LBB6_2: ; %atomicrmw.start @@ -514,12 +514,12 @@ define i32 @fetch_and_nand(ptr %p) #0 { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB6_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB6_1 ; CHECK-NOLSE-O0-NEXT: b LBB6_5 ; CHECK-NOLSE-O0-NEXT: LBB6_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -527,27 +527,27 @@ define i32 @fetch_and_nand(ptr %p) #0 { ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB6_1 ; CHECK-OUTLINE-O0-NEXT: LBB6_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0x7 ; CHECK-OUTLINE-O0-NEXT: mvn w1, w8 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_rel -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB6_1 ; CHECK-OUTLINE-O0-NEXT: b LBB6_2 ; CHECK-OUTLINE-O0-NEXT: LBB6_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -570,26 +570,26 @@ define i32 @fetch_and_nand(ptr %p) #0 { ; CHECK-LSE-O0-LABEL: fetch_and_nand: ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: sub sp, sp, #32 -; CHECK-LSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-LSE-O0-NEXT: ldr w8, [x0] -; CHECK-LSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-LSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-LSE-O0-NEXT: b LBB6_1 ; CHECK-LSE-O0-NEXT: LBB6_1: ; %atomicrmw.start ; CHECK-LSE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-LSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-LSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-LSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload ; CHECK-LSE-O0-NEXT: and w9, w8, #0x7 ; CHECK-LSE-O0-NEXT: mvn w10, w9 ; CHECK-LSE-O0-NEXT: mov x9, x8 ; CHECK-LSE-O0-NEXT: casl w9, w10, [x11] ; CHECK-LSE-O0-NEXT: subs w8, w9, w8 ; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-LSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-LSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-LSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-LSE-O0-NEXT: tbz w8, #0, LBB6_1 ; CHECK-LSE-O0-NEXT: b LBB6_2 ; CHECK-LSE-O0-NEXT: LBB6_2: ; %atomicrmw.end -; CHECK-LSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-LSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-LSE-O0-NEXT: add sp, sp, #32 ; CHECK-LSE-O0-NEXT: ret %val = atomicrmw nand ptr %p, i32 7 release @@ -626,15 +626,15 @@ define i64 @fetch_and_nand_64(ptr %p) #0 { ; CHECK-NOLSE-O0-LABEL: fetch_and_nand_64: ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB7_1 ; CHECK-NOLSE-O0-NEXT: LBB7_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB7_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: and x9, x8, #0x7 ; CHECK-NOLSE-O0-NEXT: mvn x12, x9 ; CHECK-NOLSE-O0-NEXT: LBB7_2: ; %atomicrmw.start @@ -651,12 +651,12 @@ define i64 @fetch_and_nand_64(ptr %p) #0 { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB7_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB7_1 ; CHECK-NOLSE-O0-NEXT: b LBB7_5 ; CHECK-NOLSE-O0-NEXT: LBB7_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -664,27 +664,27 @@ define i64 @fetch_and_nand_64(ptr %p) #0 { ; CHECK-OUTLINE-O0: ; %bb.0: ; CHECK-OUTLINE-O0-NEXT: sub sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0] -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB7_1 ; CHECK-OUTLINE-O0-NEXT: LBB7_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #24] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: and x8, x0, #0x7 ; CHECK-OUTLINE-O0-NEXT: mvn x1, x8 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB7_1 ; CHECK-OUTLINE-O0-NEXT: b LBB7_2 ; CHECK-OUTLINE-O0-NEXT: LBB7_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -707,26 +707,26 @@ define i64 @fetch_and_nand_64(ptr %p) #0 { ; CHECK-LSE-O0-LABEL: fetch_and_nand_64: ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: sub sp, sp, #32 -; CHECK-LSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-LSE-O0-NEXT: ldr x8, [x0] -; CHECK-LSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-LSE-O0-NEXT: b LBB7_1 ; CHECK-LSE-O0-NEXT: LBB7_1: ; %atomicrmw.start ; CHECK-LSE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-LSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-LSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-LSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload ; CHECK-LSE-O0-NEXT: and x9, x8, #0x7 ; CHECK-LSE-O0-NEXT: mvn x10, x9 ; CHECK-LSE-O0-NEXT: mov x9, x8 ; CHECK-LSE-O0-NEXT: casal x9, x10, [x11] ; CHECK-LSE-O0-NEXT: subs x8, x9, x8 ; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Folded Spill -; CHECK-LSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Spill +; CHECK-LSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-LSE-O0-NEXT: tbz w8, #0, LBB7_1 ; CHECK-LSE-O0-NEXT: b LBB7_2 ; CHECK-LSE-O0-NEXT: LBB7_2: ; %atomicrmw.end -; CHECK-LSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-LSE-O0-NEXT: add sp, sp, #32 ; CHECK-LSE-O0-NEXT: ret %val = atomicrmw nand ptr %p, i64 7 acq_rel @@ -759,15 +759,15 @@ define i32 @fetch_and_or(ptr %p) #0 { ; CHECK-NOLSE-O0-LABEL: fetch_and_or: ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB8_1 ; CHECK-NOLSE-O0-NEXT: LBB8_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB8_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: mov w9, #5 ; =0x5 ; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB8_2: ; %atomicrmw.start @@ -784,12 +784,12 @@ define i32 @fetch_and_or(ptr %p) #0 { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB8_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB8_1 ; CHECK-NOLSE-O0-NEXT: b LBB8_5 ; CHECK-NOLSE-O0-NEXT: LBB8_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -833,15 +833,15 @@ define i64 @fetch_and_or_64(ptr %p) #0 { ; CHECK-NOLSE-O0-LABEL: fetch_and_or_64: ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB9_1 ; CHECK-NOLSE-O0-NEXT: LBB9_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB9_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: orr x12, x8, #0x7 ; CHECK-NOLSE-O0-NEXT: LBB9_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB9_1 Depth=1 @@ -857,12 +857,12 @@ define i64 @fetch_and_or_64(ptr %p) #0 { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB9_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB9_1 ; CHECK-NOLSE-O0-NEXT: b LBB9_5 ; CHECK-NOLSE-O0-NEXT: LBB9_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -1871,17 +1871,17 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB27_1 ; CHECK-NOLSE-O0-NEXT: LBB27_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB27_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add w12, w8, w10, uxth ; CHECK-NOLSE-O0-NEXT: LBB27_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB27_1 Depth=1 @@ -1895,15 +1895,15 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB27_2 ; CHECK-NOLSE-O0-NEXT: LBB27_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB27_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB27_1 ; CHECK-NOLSE-O0-NEXT: b LBB27_5 ; CHECK-NOLSE-O0-NEXT: LBB27_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -1914,9 +1914,9 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd1_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -1964,17 +1964,17 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB28_1 ; CHECK-NOLSE-O0-NEXT: LBB28_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB28_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: LBB28_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB28_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1987,15 +1987,15 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB28_2 ; CHECK-NOLSE-O0-NEXT: LBB28_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB28_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB28_1 ; CHECK-NOLSE-O0-NEXT: b LBB28_5 ; CHECK-NOLSE-O0-NEXT: LBB28_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2006,9 +2006,9 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp1_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -2057,17 +2057,17 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB29_1 ; CHECK-NOLSE-O0-NEXT: LBB29_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB29_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w12, w10, w8 ; CHECK-NOLSE-O0-NEXT: LBB29_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB29_1 Depth=1 @@ -2081,15 +2081,15 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB29_2 ; CHECK-NOLSE-O0-NEXT: LBB29_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB29_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB29_1 ; CHECK-NOLSE-O0-NEXT: b LBB29_5 ; CHECK-NOLSE-O0-NEXT: LBB29_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2100,9 +2100,9 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w9, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov w8, wzr ; CHECK-OUTLINE-O0-NEXT: subs w0, w8, w9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd1_acq @@ -2155,17 +2155,17 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB30_1 ; CHECK-NOLSE-O0-NEXT: LBB30_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB30_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: and w12, w10, w8 ; CHECK-NOLSE-O0-NEXT: LBB30_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB30_1 Depth=1 @@ -2179,15 +2179,15 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB30_2 ; CHECK-NOLSE-O0-NEXT: LBB30_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB30_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB30_1 ; CHECK-NOLSE-O0-NEXT: b LBB30_5 ; CHECK-NOLSE-O0-NEXT: LBB30_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2198,9 +2198,9 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w9, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov w8, #-1 ; =0xffffffff ; CHECK-OUTLINE-O0-NEXT: eor w0, w8, w9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr1_rel @@ -2253,17 +2253,17 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB31_1 ; CHECK-NOLSE-O0-NEXT: LBB31_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB31_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: orr w12, w10, w8 ; CHECK-NOLSE-O0-NEXT: LBB31_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB31_1 Depth=1 @@ -2277,15 +2277,15 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB31_2 ; CHECK-NOLSE-O0-NEXT: LBB31_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB31_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB31_1 ; CHECK-NOLSE-O0-NEXT: b LBB31_5 ; CHECK-NOLSE-O0-NEXT: LBB31_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2296,9 +2296,9 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset1_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -2347,17 +2347,17 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB32_1 ; CHECK-NOLSE-O0-NEXT: LBB32_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB32_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: eor w12, w10, w8 ; CHECK-NOLSE-O0-NEXT: LBB32_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB32_1 Depth=1 @@ -2371,15 +2371,15 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB32_2 ; CHECK-NOLSE-O0-NEXT: LBB32_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB32_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB32_1 ; CHECK-NOLSE-O0-NEXT: b LBB32_5 ; CHECK-NOLSE-O0-NEXT: LBB32_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2390,9 +2390,9 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor1_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -2444,17 +2444,17 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB33_1 ; CHECK-NOLSE-O0-NEXT: LBB33_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB33_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb ; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, le @@ -2470,15 +2470,15 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB33_2 ; CHECK-NOLSE-O0-NEXT: LBB33_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB33_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB33_1 ; CHECK-NOLSE-O0-NEXT: b LBB33_5 ; CHECK-NOLSE-O0-NEXT: LBB33_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2489,31 +2489,31 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB33_1 ; CHECK-OUTLINE-O0-NEXT: LBB33_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: sxtb w9, w0 ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxtb ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, le ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_acq -; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB33_1 ; CHECK-OUTLINE-O0-NEXT: b LBB33_2 ; CHECK-OUTLINE-O0-NEXT: LBB33_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -2564,17 +2564,17 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB34_1 ; CHECK-NOLSE-O0-NEXT: LBB34_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB34_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb ; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, gt @@ -2590,15 +2590,15 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB34_2 ; CHECK-NOLSE-O0-NEXT: LBB34_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB34_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB34_1 ; CHECK-NOLSE-O0-NEXT: b LBB34_5 ; CHECK-NOLSE-O0-NEXT: LBB34_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2609,31 +2609,31 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB34_1 ; CHECK-OUTLINE-O0-NEXT: LBB34_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: sxtb w9, w0 ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxtb ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, gt ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_rel -; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB34_1 ; CHECK-OUTLINE-O0-NEXT: b LBB34_2 ; CHECK-OUTLINE-O0-NEXT: LBB34_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -2686,17 +2686,17 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB35_1 ; CHECK-NOLSE-O0-NEXT: LBB35_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB35_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb ; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ls @@ -2712,15 +2712,15 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB35_2 ; CHECK-NOLSE-O0-NEXT: LBB35_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB35_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB35_1 ; CHECK-NOLSE-O0-NEXT: b LBB35_5 ; CHECK-NOLSE-O0-NEXT: LBB35_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2731,31 +2731,31 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB35_1 ; CHECK-OUTLINE-O0-NEXT: LBB35_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w9, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxtb ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, ls ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_acq_rel -; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB35_1 ; CHECK-OUTLINE-O0-NEXT: b LBB35_2 ; CHECK-OUTLINE-O0-NEXT: LBB35_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -2808,17 +2808,17 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrb w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB36_1 ; CHECK-NOLSE-O0-NEXT: LBB36_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB36_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w10, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb ; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, hi @@ -2834,15 +2834,15 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB36_2 ; CHECK-NOLSE-O0-NEXT: LBB36_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB36_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: and w8, w9, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w10, uxtb ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB36_1 ; CHECK-NOLSE-O0-NEXT: b LBB36_5 ; CHECK-NOLSE-O0-NEXT: LBB36_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2853,31 +2853,31 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrb w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB36_1 ; CHECK-OUTLINE-O0-NEXT: LBB36_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w9, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxtb ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, hi ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_relax -; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w9, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w9, uxtb ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB36_1 ; CHECK-OUTLINE-O0-NEXT: b LBB36_2 ; CHECK-OUTLINE-O0-NEXT: LBB36_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -2925,17 +2925,17 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB37_1 ; CHECK-NOLSE-O0-NEXT: LBB37_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB37_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add w12, w9, w8, uxth ; CHECK-NOLSE-O0-NEXT: LBB37_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB37_1 Depth=1 @@ -2949,15 +2949,15 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB37_2 ; CHECK-NOLSE-O0-NEXT: LBB37_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB37_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB37_1 ; CHECK-NOLSE-O0-NEXT: b LBB37_5 ; CHECK-NOLSE-O0-NEXT: LBB37_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -2968,9 +2968,9 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd2_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -3018,17 +3018,17 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB38_1 ; CHECK-NOLSE-O0-NEXT: LBB38_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB38_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: LBB38_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB38_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -3041,15 +3041,15 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB38_2 ; CHECK-NOLSE-O0-NEXT: LBB38_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB38_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB38_1 ; CHECK-NOLSE-O0-NEXT: b LBB38_5 ; CHECK-NOLSE-O0-NEXT: LBB38_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3060,9 +3060,9 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp2_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -3111,17 +3111,17 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB39_1 ; CHECK-NOLSE-O0-NEXT: LBB39_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB39_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB39_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB39_1 Depth=1 @@ -3135,15 +3135,15 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB39_2 ; CHECK-NOLSE-O0-NEXT: LBB39_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB39_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB39_1 ; CHECK-NOLSE-O0-NEXT: b LBB39_5 ; CHECK-NOLSE-O0-NEXT: LBB39_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3154,9 +3154,9 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w9, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov w8, wzr ; CHECK-OUTLINE-O0-NEXT: subs w0, w8, w9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd2_acq @@ -3209,17 +3209,17 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB40_1 ; CHECK-NOLSE-O0-NEXT: LBB40_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB40_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: and w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB40_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB40_1 Depth=1 @@ -3233,15 +3233,15 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB40_2 ; CHECK-NOLSE-O0-NEXT: LBB40_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB40_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB40_1 ; CHECK-NOLSE-O0-NEXT: b LBB40_5 ; CHECK-NOLSE-O0-NEXT: LBB40_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3252,9 +3252,9 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w9, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov w8, #-1 ; =0xffffffff ; CHECK-OUTLINE-O0-NEXT: eor w0, w8, w9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr2_rel @@ -3307,17 +3307,17 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB41_1 ; CHECK-NOLSE-O0-NEXT: LBB41_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB41_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB41_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB41_1 Depth=1 @@ -3331,15 +3331,15 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB41_2 ; CHECK-NOLSE-O0-NEXT: LBB41_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB41_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB41_1 ; CHECK-NOLSE-O0-NEXT: b LBB41_5 ; CHECK-NOLSE-O0-NEXT: LBB41_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3350,9 +3350,9 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset2_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -3401,17 +3401,17 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB42_1 ; CHECK-NOLSE-O0-NEXT: LBB42_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB42_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: eor w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB42_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB42_1 Depth=1 @@ -3425,15 +3425,15 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB42_2 ; CHECK-NOLSE-O0-NEXT: LBB42_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB42_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB42_1 ; CHECK-NOLSE-O0-NEXT: b LBB42_5 ; CHECK-NOLSE-O0-NEXT: LBB42_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3444,9 +3444,9 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor2_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -3498,17 +3498,17 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB43_1 ; CHECK-NOLSE-O0-NEXT: LBB43_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB43_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, le @@ -3524,15 +3524,15 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB43_2 ; CHECK-NOLSE-O0-NEXT: LBB43_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB43_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB43_1 ; CHECK-NOLSE-O0-NEXT: b LBB43_5 ; CHECK-NOLSE-O0-NEXT: LBB43_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3543,31 +3543,31 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB43_1 ; CHECK-OUTLINE-O0-NEXT: LBB43_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: sxth w9, w0 ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxth ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, le ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_acq -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: uxth w8, w8 ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB43_1 ; CHECK-OUTLINE-O0-NEXT: b LBB43_2 ; CHECK-OUTLINE-O0-NEXT: LBB43_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -3618,17 +3618,17 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB44_1 ; CHECK-NOLSE-O0-NEXT: LBB44_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB44_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, gt @@ -3644,15 +3644,15 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB44_2 ; CHECK-NOLSE-O0-NEXT: LBB44_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB44_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB44_1 ; CHECK-NOLSE-O0-NEXT: b LBB44_5 ; CHECK-NOLSE-O0-NEXT: LBB44_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3663,31 +3663,31 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB44_1 ; CHECK-OUTLINE-O0-NEXT: LBB44_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: sxth w9, w0 ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, sxth ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, gt ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_rel -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: uxth w8, w8 ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB44_1 ; CHECK-OUTLINE-O0-NEXT: b LBB44_2 ; CHECK-OUTLINE-O0-NEXT: LBB44_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -3740,17 +3740,17 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB45_1 ; CHECK-NOLSE-O0-NEXT: LBB45_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB45_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ls @@ -3766,15 +3766,15 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB45_2 ; CHECK-NOLSE-O0-NEXT: LBB45_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB45_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB45_1 ; CHECK-NOLSE-O0-NEXT: b LBB45_5 ; CHECK-NOLSE-O0-NEXT: LBB45_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3785,31 +3785,31 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB45_1 ; CHECK-OUTLINE-O0-NEXT: LBB45_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: uxth w9, w0 ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxth ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, ls ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_acq_rel -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: uxth w8, w8 ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB45_1 ; CHECK-OUTLINE-O0-NEXT: b LBB45_2 ; CHECK-OUTLINE-O0-NEXT: LBB45_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -3862,17 +3862,17 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldrh w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB46_1 ; CHECK-NOLSE-O0-NEXT: LBB46_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB46_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, hi @@ -3888,15 +3888,15 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB46_2 ; CHECK-NOLSE-O0-NEXT: LBB46_4: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB46_1 Depth=1 -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: uxth w8, w8 ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w9, uxth ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB46_1 ; CHECK-NOLSE-O0-NEXT: b LBB46_5 ; CHECK-NOLSE-O0-NEXT: LBB46_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -3907,31 +3907,31 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldrh w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB46_1 ; CHECK-OUTLINE-O0-NEXT: LBB46_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: uxth w9, w0 ; CHECK-OUTLINE-O0-NEXT: subs w9, w9, w8, uxth ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, hi ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_relax -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: uxth w8, w8 ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w0, uxth ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB46_1 ; CHECK-OUTLINE-O0-NEXT: b LBB46_2 ; CHECK-OUTLINE-O0-NEXT: LBB46_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -3979,17 +3979,17 @@ define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB47_1 ; CHECK-NOLSE-O0-NEXT: LBB47_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB47_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB47_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB47_1 Depth=1 @@ -4005,12 +4005,12 @@ define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB47_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB47_1 ; CHECK-NOLSE-O0-NEXT: b LBB47_5 ; CHECK-NOLSE-O0-NEXT: LBB47_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4021,9 +4021,9 @@ define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd4_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -4072,17 +4072,17 @@ define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB48_1 ; CHECK-NOLSE-O0-NEXT: LBB48_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB48_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w12, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: LBB48_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB48_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -4097,12 +4097,12 @@ define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB48_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB48_1 ; CHECK-NOLSE-O0-NEXT: b LBB48_5 ; CHECK-NOLSE-O0-NEXT: LBB48_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4113,9 +4113,9 @@ define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp4_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -4164,17 +4164,17 @@ define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB49_1 ; CHECK-NOLSE-O0-NEXT: LBB49_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB49_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB49_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB49_1 Depth=1 @@ -4190,12 +4190,12 @@ define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB49_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB49_1 ; CHECK-NOLSE-O0-NEXT: b LBB49_5 ; CHECK-NOLSE-O0-NEXT: LBB49_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4206,9 +4206,9 @@ define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w9, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov w8, wzr ; CHECK-OUTLINE-O0-NEXT: subs w0, w8, w9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd4_acq @@ -4261,17 +4261,17 @@ define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB50_1 ; CHECK-NOLSE-O0-NEXT: LBB50_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB50_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: and w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB50_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB50_1 Depth=1 @@ -4287,12 +4287,12 @@ define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB50_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB50_1 ; CHECK-NOLSE-O0-NEXT: b LBB50_5 ; CHECK-NOLSE-O0-NEXT: LBB50_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4303,9 +4303,9 @@ define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w9, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov w8, #-1 ; =0xffffffff ; CHECK-OUTLINE-O0-NEXT: eor w0, w8, w9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr4_rel @@ -4358,17 +4358,17 @@ define i32 @atomicrmw_or_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB51_1 ; CHECK-NOLSE-O0-NEXT: LBB51_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB51_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB51_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB51_1 Depth=1 @@ -4384,12 +4384,12 @@ define i32 @atomicrmw_or_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB51_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB51_1 ; CHECK-NOLSE-O0-NEXT: b LBB51_5 ; CHECK-NOLSE-O0-NEXT: LBB51_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4400,9 +4400,9 @@ define i32 @atomicrmw_or_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset4_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -4451,17 +4451,17 @@ define i32 @atomicrmw_xor_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB52_1 ; CHECK-NOLSE-O0-NEXT: LBB52_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB52_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: eor w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB52_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB52_1 Depth=1 @@ -4477,12 +4477,12 @@ define i32 @atomicrmw_xor_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB52_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB52_1 ; CHECK-NOLSE-O0-NEXT: b LBB52_5 ; CHECK-NOLSE-O0-NEXT: LBB52_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4493,9 +4493,9 @@ define i32 @atomicrmw_xor_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor4_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -4545,17 +4545,17 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB53_1 ; CHECK-NOLSE-O0-NEXT: LBB53_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB53_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9 ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, le ; CHECK-NOLSE-O0-NEXT: LBB53_2: ; %atomicrmw.start @@ -4572,12 +4572,12 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB53_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB53_1 ; CHECK-NOLSE-O0-NEXT: b LBB53_5 ; CHECK-NOLSE-O0-NEXT: LBB53_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4588,29 +4588,29 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB53_1 ; CHECK-OUTLINE-O0-NEXT: LBB53_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8 ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, le ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB53_1 ; CHECK-OUTLINE-O0-NEXT: b LBB53_2 ; CHECK-OUTLINE-O0-NEXT: LBB53_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -4659,17 +4659,17 @@ define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB54_1 ; CHECK-NOLSE-O0-NEXT: LBB54_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB54_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9 ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, gt ; CHECK-NOLSE-O0-NEXT: LBB54_2: ; %atomicrmw.start @@ -4686,12 +4686,12 @@ define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB54_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB54_1 ; CHECK-NOLSE-O0-NEXT: b LBB54_5 ; CHECK-NOLSE-O0-NEXT: LBB54_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4702,29 +4702,29 @@ define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB54_1 ; CHECK-OUTLINE-O0-NEXT: LBB54_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8 ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, gt ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_rel -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB54_1 ; CHECK-OUTLINE-O0-NEXT: b LBB54_2 ; CHECK-OUTLINE-O0-NEXT: LBB54_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -4773,17 +4773,17 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB55_1 ; CHECK-NOLSE-O0-NEXT: LBB55_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB55_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9 ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ls ; CHECK-NOLSE-O0-NEXT: LBB55_2: ; %atomicrmw.start @@ -4800,12 +4800,12 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB55_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB55_1 ; CHECK-NOLSE-O0-NEXT: b LBB55_5 ; CHECK-NOLSE-O0-NEXT: LBB55_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4816,29 +4816,29 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB55_1 ; CHECK-OUTLINE-O0-NEXT: LBB55_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8 ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, ls ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_acq_rel -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB55_1 ; CHECK-OUTLINE-O0-NEXT: b LBB55_2 ; CHECK-OUTLINE-O0-NEXT: LBB55_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -4887,17 +4887,17 @@ define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] -; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB56_1 ; CHECK-NOLSE-O0-NEXT: LBB56_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB56_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: subs w10, w8, w9 ; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, hi ; CHECK-NOLSE-O0-NEXT: LBB56_2: ; %atomicrmw.start @@ -4914,12 +4914,12 @@ define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB56_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Spill +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB56_1 ; CHECK-NOLSE-O0-NEXT: b LBB56_5 ; CHECK-NOLSE-O0-NEXT: LBB56_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -4930,29 +4930,29 @@ define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w1, [sp, #24] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr w0, [x0] -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB56_1 ; CHECK-OUTLINE-O0-NEXT: LBB56_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #28] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #16] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Reload +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #8] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs w9, w0, w8 ; CHECK-OUTLINE-O0-NEXT: csel w1, w0, w8, hi ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_relax -; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w8, [sp, #8] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #28] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB56_1 ; CHECK-OUTLINE-O0-NEXT: b LBB56_2 ; CHECK-OUTLINE-O0-NEXT: LBB56_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #48 ; CHECK-OUTLINE-O0-NEXT: ret @@ -5000,17 +5000,17 @@ define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB57_1 ; CHECK-NOLSE-O0-NEXT: LBB57_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB57_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add x12, x8, x9 ; CHECK-NOLSE-O0-NEXT: LBB57_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB57_1 Depth=1 @@ -5026,12 +5026,12 @@ define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB57_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB57_1 ; CHECK-NOLSE-O0-NEXT: b LBB57_5 ; CHECK-NOLSE-O0-NEXT: LBB57_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5042,9 +5042,9 @@ define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd8_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -5092,17 +5092,17 @@ define i64 @atomicrmw_xchg_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB58_1 ; CHECK-NOLSE-O0-NEXT: LBB58_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB58_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x12, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x12, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: LBB58_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB58_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -5117,12 +5117,12 @@ define i64 @atomicrmw_xchg_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB58_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB58_1 ; CHECK-NOLSE-O0-NEXT: b LBB58_5 ; CHECK-NOLSE-O0-NEXT: LBB58_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5133,9 +5133,9 @@ define i64 @atomicrmw_xchg_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_swp8_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -5184,17 +5184,17 @@ define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB59_1 ; CHECK-NOLSE-O0-NEXT: LBB59_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB59_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: subs x12, x8, x9 ; CHECK-NOLSE-O0-NEXT: LBB59_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB59_1 Depth=1 @@ -5210,12 +5210,12 @@ define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB59_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB59_1 ; CHECK-NOLSE-O0-NEXT: b LBB59_5 ; CHECK-NOLSE-O0-NEXT: LBB59_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5226,9 +5226,9 @@ define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x9, x1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov x8, xzr ; CHECK-OUTLINE-O0-NEXT: subs x0, x8, x9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldadd8_acq @@ -5281,17 +5281,17 @@ define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB60_1 ; CHECK-NOLSE-O0-NEXT: LBB60_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB60_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: and x12, x8, x9 ; CHECK-NOLSE-O0-NEXT: LBB60_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB60_1 Depth=1 @@ -5307,12 +5307,12 @@ define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB60_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB60_1 ; CHECK-NOLSE-O0-NEXT: b LBB60_5 ; CHECK-NOLSE-O0-NEXT: LBB60_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5323,9 +5323,9 @@ define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x9, x1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: mov x8, #-1 ; =0xffffffffffffffff ; CHECK-OUTLINE-O0-NEXT: eor x0, x8, x9 ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldclr8_rel @@ -5378,17 +5378,17 @@ define i64 @atomicrmw_or_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB61_1 ; CHECK-NOLSE-O0-NEXT: LBB61_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB61_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: orr x12, x8, x9 ; CHECK-NOLSE-O0-NEXT: LBB61_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB61_1 Depth=1 @@ -5404,12 +5404,12 @@ define i64 @atomicrmw_or_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB61_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB61_1 ; CHECK-NOLSE-O0-NEXT: b LBB61_5 ; CHECK-NOLSE-O0-NEXT: LBB61_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5420,9 +5420,9 @@ define i64 @atomicrmw_or_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldset8_acq_rel ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -5471,17 +5471,17 @@ define i64 @atomicrmw_xor_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB62_1 ; CHECK-NOLSE-O0-NEXT: LBB62_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB62_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: eor x12, x8, x9 ; CHECK-NOLSE-O0-NEXT: LBB62_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB62_1 Depth=1 @@ -5497,12 +5497,12 @@ define i64 @atomicrmw_xor_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB62_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB62_1 ; CHECK-NOLSE-O0-NEXT: b LBB62_5 ; CHECK-NOLSE-O0-NEXT: LBB62_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5513,9 +5513,9 @@ define i64 @atomicrmw_xor_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_ldeor8_relax ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #32 @@ -5565,17 +5565,17 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB63_1 ; CHECK-NOLSE-O0-NEXT: LBB63_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB63_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9 ; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, le ; CHECK-NOLSE-O0-NEXT: LBB63_2: ; %atomicrmw.start @@ -5592,12 +5592,12 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB63_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB63_1 ; CHECK-NOLSE-O0-NEXT: b LBB63_5 ; CHECK-NOLSE-O0-NEXT: LBB63_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5608,29 +5608,29 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0] -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB63_1 ; CHECK-OUTLINE-O0-NEXT: LBB63_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8 ; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, le ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB63_1 ; CHECK-OUTLINE-O0-NEXT: b LBB63_2 ; CHECK-OUTLINE-O0-NEXT: LBB63_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64 ; CHECK-OUTLINE-O0-NEXT: ret @@ -5679,17 +5679,17 @@ define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB64_1 ; CHECK-NOLSE-O0-NEXT: LBB64_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB64_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9 ; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, gt ; CHECK-NOLSE-O0-NEXT: LBB64_2: ; %atomicrmw.start @@ -5706,12 +5706,12 @@ define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB64_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB64_1 ; CHECK-NOLSE-O0-NEXT: b LBB64_5 ; CHECK-NOLSE-O0-NEXT: LBB64_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5722,29 +5722,29 @@ define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0] -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB64_1 ; CHECK-OUTLINE-O0-NEXT: LBB64_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8 ; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, gt ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_rel -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB64_1 ; CHECK-OUTLINE-O0-NEXT: b LBB64_2 ; CHECK-OUTLINE-O0-NEXT: LBB64_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64 ; CHECK-OUTLINE-O0-NEXT: ret @@ -5793,17 +5793,17 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB65_1 ; CHECK-NOLSE-O0-NEXT: LBB65_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB65_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9 ; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, ls ; CHECK-NOLSE-O0-NEXT: LBB65_2: ; %atomicrmw.start @@ -5820,12 +5820,12 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB65_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB65_1 ; CHECK-NOLSE-O0-NEXT: b LBB65_5 ; CHECK-NOLSE-O0-NEXT: LBB65_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5836,29 +5836,29 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0] -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB65_1 ; CHECK-OUTLINE-O0-NEXT: LBB65_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8 ; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, ls ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_acq_rel -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB65_1 ; CHECK-OUTLINE-O0-NEXT: b LBB65_2 ; CHECK-OUTLINE-O0-NEXT: LBB65_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64 ; CHECK-OUTLINE-O0-NEXT: ret @@ -5907,17 +5907,17 @@ define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0: ; %bb.0: ; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x1, [sp, #16] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] -; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: b LBB66_1 ; CHECK-NOLSE-O0-NEXT: LBB66_1: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB66_2 Depth 2 -; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #8] ; 8-byte Reload +; CHECK-NOLSE-O0-NEXT: ldr x9, [sp, #16] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: subs x10, x8, x9 ; CHECK-NOLSE-O0-NEXT: csel x12, x8, x9, hi ; CHECK-NOLSE-O0-NEXT: LBB66_2: ; %atomicrmw.start @@ -5934,12 +5934,12 @@ define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB66_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 ; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Folded Spill -; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp] ; 8-byte Spill +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Spill ; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB66_1 ; CHECK-NOLSE-O0-NEXT: b LBB66_5 ; CHECK-NOLSE-O0-NEXT: LBB66_5: ; %atomicrmw.end -; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; CHECK-NOLSE-O0-NEXT: ret ; @@ -5950,29 +5950,29 @@ define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #24] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x1, [sp, #32] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: ldr x0, [x0] -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: b LBB66_1 ; CHECK-OUTLINE-O0-NEXT: LBB66_1: ; %atomicrmw.start ; CHECK-OUTLINE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #40] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp, #24] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #32] ; 8-byte Reload +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: subs x9, x0, x8 ; CHECK-OUTLINE-O0-NEXT: csel x1, x0, x8, hi ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax -; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x8 ; CHECK-OUTLINE-O0-NEXT: cset w8, eq -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #16] ; 8-byte Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #40] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: tbz w8, #0, LBB66_1 ; CHECK-OUTLINE-O0-NEXT: b LBB66_2 ; CHECK-OUTLINE-O0-NEXT: LBB66_2: ; %atomicrmw.end -; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x0, [sp, #16] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-OUTLINE-O0-NEXT: add sp, sp, #64 ; CHECK-OUTLINE-O0-NEXT: ret @@ -6061,13 +6061,13 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w1, w2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas1_relax -; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xff ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w1, uxtb ; CHECK-OUTLINE-O0-NEXT: cset w1, eq @@ -6169,13 +6169,13 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w1, w2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas2_relax -; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: and w8, w0, #0xffff ; CHECK-OUTLINE-O0-NEXT: subs w8, w8, w1, uxth ; CHECK-OUTLINE-O0-NEXT: cset w1, eq @@ -6274,13 +6274,13 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w0, w1 -; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str w0, [sp, #12] ; 4-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov w1, w2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas4_relax -; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr w1, [sp, #12] ; 4-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs w8, w0, w1 ; CHECK-OUTLINE-O0-NEXT: cset w1, eq ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload @@ -6374,13 +6374,13 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x1, x2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x1 ; CHECK-OUTLINE-O0-NEXT: cset w1, eq ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload @@ -6474,13 +6474,13 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) { ; CHECK-OUTLINE-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w30, -8 ; CHECK-OUTLINE-O0-NEXT: .cfi_offset w29, -16 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x0, x1 -; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-OUTLINE-O0-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-OUTLINE-O0-NEXT: mov x1, x2 -; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x2, [sp] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: bl ___aarch64_cas8_relax -; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Folded Reload +; CHECK-OUTLINE-O0-NEXT: ldr x1, [sp, #8] ; 8-byte Reload ; CHECK-OUTLINE-O0-NEXT: subs x8, x0, x1 ; CHECK-OUTLINE-O0-NEXT: cset w1, eq ; CHECK-OUTLINE-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll index 4bb4e4882410d..a3d57f05d9c60 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll @@ -12,12 +12,12 @@ define void @test(ptr %0) { ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ldar w8, [x0] -; CHECK-NEXT: str w8, [sp, #116] ; 4-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #116] ; 4-byte Spill ; CHECK-NEXT: mov x8, #0 ; =0x0 -; CHECK-NEXT: str x8, [sp, #120] ; 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #120] ; 8-byte Spill ; CHECK-NEXT: blr x8 -; CHECK-NEXT: ldr w11, [sp, #116] ; 4-byte Folded Reload -; CHECK-NEXT: ldr x8, [sp, #120] ; 8-byte Folded Reload +; CHECK-NEXT: ldr w11, [sp, #116] ; 4-byte Reload +; CHECK-NEXT: ldr x8, [sp, #120] ; 8-byte Reload ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: str xzr, [x9] ; CHECK-NEXT: str xzr, [x9, #8] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll index 9e09282767bdc..6d493d545406f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -8,12 +8,12 @@ define void @call_byval_i32(ptr %incoming) uwtable { ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: str w8, [sp] ; CHECK-NEXT: bl byval_i32 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 @@ -30,7 +30,7 @@ define void @call_byval_a64i32(ptr %incoming) uwtable { ; CHECK-NEXT: sub sp, sp, #288 ; CHECK-NEXT: .cfi_def_cfa_offset 288 ; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #272] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #272] // 8-byte Spill ; CHECK-NEXT: add x29, sp, #256 ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w28, -16 @@ -71,7 +71,7 @@ define void @call_byval_a64i32(ptr %incoming) uwtable { ; CHECK-NEXT: bl byval_a64i32 ; CHECK-NEXT: .cfi_def_cfa wsp, 288 ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #288 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w28 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll index ebd2beca67810..276cfbcd2a293 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-tail-call-fallback.ll @@ -21,7 +21,7 @@ define void @pr70207(i128 %arg1, i128 %arg2) nounwind { ; CHECK-NEXT: str x8, [sp, #-32]! ; CHECK-NEXT: stp x9, x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl func -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret tail call void @func(i64 0, i64 0, i64 0, i64 0, i64 0, i128 %arg1, i128 %arg2) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll index e7f4785d01df6..447267cf57f76 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll @@ -9,7 +9,7 @@ define void @caller() sspreq { ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .seh_stackalloc 32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __security_cookie @@ -24,7 +24,7 @@ define void @caller() sspreq { ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: .seh_stackalloc 32 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-anyext-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-anyext-crash.ll index 7af5b3d801e0c..a63636e666e8f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-anyext-crash.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-anyext-crash.ll @@ -18,7 +18,7 @@ define i32 @test() { ; CHECK-NEXT: ; kill: def $d0 killed $s0 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov w9, #0 ; =0x0 -; CHECK-NEXT: str w9, [sp, #60] ; 4-byte Folded Spill +; CHECK-NEXT: str w9, [sp, #60] ; 4-byte Spill ; CHECK-NEXT: str xzr, [x8] ; CHECK-NEXT: str xzr, [x8, #8] ; CHECK-NEXT: str xzr, [x8, #16] @@ -28,7 +28,7 @@ define i32 @test() { ; CHECK-NEXT: mov x8, #0 ; =0x0 ; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: blr x8 -; CHECK-NEXT: ldr w0, [sp, #60] ; 4-byte Folded Reload +; CHECK-NEXT: ldr w0, [sp, #60] ; 4-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll b/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll index 480fcbd6a9788..d669c49cb019b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll @@ -1243,7 +1243,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov w8, w2 ; GISEL-NEXT: lsr x9, x8, #6 ; GISEL-NEXT: and x12, x8, #0x3f -; GISEL-NEXT: str x0, [sp, #144] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #144] ; 8-byte Spill ; GISEL-NEXT: and x14, x8, #0x3f ; GISEL-NEXT: mov w13, #64 ; =0x40 ; GISEL-NEXT: and x16, x8, #0x3f @@ -1270,16 +1270,16 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: and x21, x8, #0x3f ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #6 -; GISEL-NEXT: str x6, [sp, #24] ; 8-byte Folded Spill +; GISEL-NEXT: str x6, [sp, #24] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #7 -; GISEL-NEXT: str x28, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: str x28, [sp, #304] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #8 -; GISEL-NEXT: str x7, [sp, #272] ; 8-byte Folded Spill +; GISEL-NEXT: str x7, [sp, #272] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #9 -; GISEL-NEXT: str x20, [sp, #112] ; 8-byte Folded Spill +; GISEL-NEXT: str x20, [sp, #112] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #10 ; GISEL-NEXT: csel x12, xzr, x12, eq @@ -1296,7 +1296,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x10, x10, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x10, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #232] ; 8-byte Spill ; GISEL-NEXT: csel x10, xzr, x3, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x10, x6, x10 @@ -1336,7 +1336,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsl x26, x12, x14 ; GISEL-NEXT: csel x11, x11, x13, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x11, [sp, #224] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #224] ; 8-byte Spill ; GISEL-NEXT: csel x11, xzr, x20, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x11, x26, x11 @@ -1380,7 +1380,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x11, x12, x11, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x11, [sp, #216] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #216] ; 8-byte Spill ; GISEL-NEXT: csel x11, xzr, x15, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x11, x30, x11 @@ -1426,7 +1426,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsl x0, x12, x16 ; GISEL-NEXT: csel x10, x10, x13, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x10, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #208] ; 8-byte Spill ; GISEL-NEXT: csel x10, xzr, x17, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x10, x0, x10 @@ -1437,9 +1437,9 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov x16, x15 ; GISEL-NEXT: csel x13, xzr, x15, eq ; GISEL-NEXT: cmp x9, #1 -; GISEL-NEXT: str x4, [sp, #248] ; 8-byte Folded Spill +; GISEL-NEXT: str x4, [sp, #248] ; 8-byte Spill ; GISEL-NEXT: orr x13, x30, x13 -; GISEL-NEXT: str x0, [sp, #48] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #48] ; 8-byte Spill ; GISEL-NEXT: csel x10, x13, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x20, eq @@ -1478,7 +1478,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x10, x12, x10, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x10, [sp, #200] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #200] ; 8-byte Spill ; GISEL-NEXT: csel x10, xzr, x4, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x10, x19, x10 @@ -1532,7 +1532,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsl x22, x12, x15 ; GISEL-NEXT: csel x11, x11, x13, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x11, [sp, #192] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #192] ; 8-byte Spill ; GISEL-NEXT: csel x11, xzr, x3, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x11, x22, x11 @@ -1545,7 +1545,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov x25, x27 ; GISEL-NEXT: orr x13, x19, x13 ; GISEL-NEXT: mov x14, x5 -; GISEL-NEXT: str x27, [sp, #328] ; 8-byte Folded Spill +; GISEL-NEXT: str x27, [sp, #328] ; 8-byte Spill ; GISEL-NEXT: csel x11, x13, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x17, eq @@ -1592,7 +1592,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x11, x12, x11, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x11, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #184] ; 8-byte Spill ; GISEL-NEXT: csel x11, xzr, x13, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x11, x5, x11 @@ -1650,12 +1650,12 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x12, x10, x12, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsl x23, x11, x21 -; GISEL-NEXT: str x12, [sp, #176] ; 8-byte Folded Spill +; GISEL-NEXT: str x12, [sp, #176] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x27, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x12, x23, x12 ; GISEL-NEXT: lsr x21, x11, x2 -; GISEL-NEXT: str x23, [sp, #288] ; 8-byte Folded Spill +; GISEL-NEXT: str x23, [sp, #288] ; 8-byte Spill ; GISEL-NEXT: csel x12, x12, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x13, eq @@ -1714,7 +1714,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x11, x11, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x11, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #168] ; 8-byte Spill ; GISEL-NEXT: csel x11, xzr, x21, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x11, x10, x11 @@ -1745,7 +1745,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x12, xzr, x4, eq ; GISEL-NEXT: cmp x9, #4 -; GISEL-NEXT: str x22, [sp, #240] ; 8-byte Folded Spill +; GISEL-NEXT: str x22, [sp, #240] ; 8-byte Spill ; GISEL-NEXT: orr x12, x19, x12 ; GISEL-NEXT: csel x11, x12, x11, eq ; GISEL-NEXT: tst x8, #0x3f @@ -1776,7 +1776,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldr x24, [x6, #88] ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #11 -; GISEL-NEXT: ldr x6, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x6, [sp, #272] ; 8-byte Reload ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #12 ; GISEL-NEXT: csel x11, xzr, x11, eq @@ -1792,13 +1792,13 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov x28, x2 ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsl x2, x11, x13 -; GISEL-NEXT: str x12, [sp, #160] ; 8-byte Folded Spill +; GISEL-NEXT: str x12, [sp, #160] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x22, eq ; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: ldr x1, [sp, #312] ; 8-byte Folded Reload -; GISEL-NEXT: str x28, [sp, #16] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x1, [sp, #312] ; 8-byte Reload +; GISEL-NEXT: str x28, [sp, #16] ; 8-byte Spill ; GISEL-NEXT: orr x12, x2, x12 -; GISEL-NEXT: str x2, [sp, #280] ; 8-byte Folded Spill +; GISEL-NEXT: str x2, [sp, #280] ; 8-byte Spill ; GISEL-NEXT: csel x12, x12, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x21, eq @@ -1811,7 +1811,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov x25, x16 ; GISEL-NEXT: orr x13, x10, x13 ; GISEL-NEXT: mov x10, x30 -; GISEL-NEXT: str x25, [sp, #80] ; 8-byte Folded Spill +; GISEL-NEXT: str x25, [sp, #80] ; 8-byte Spill ; GISEL-NEXT: csel x12, x13, x12, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x23, eq @@ -1826,7 +1826,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #4 ; GISEL-NEXT: mov x3, x21 ; GISEL-NEXT: orr x13, x15, x13 -; GISEL-NEXT: str x3, [sp, #32] ; 8-byte Folded Spill +; GISEL-NEXT: str x3, [sp, #32] ; 8-byte Spill ; GISEL-NEXT: csel x12, x13, x12, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x4, eq @@ -1839,7 +1839,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #6 ; GISEL-NEXT: mov x17, x27 ; GISEL-NEXT: orr x13, x0, x13 -; GISEL-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x0, [sp, #24] ; 8-byte Reload ; GISEL-NEXT: csel x12, x13, x12, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x16, eq @@ -1863,7 +1863,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #11 ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #12 -; GISEL-NEXT: str x13, [sp, #96] ; 8-byte Folded Spill +; GISEL-NEXT: str x13, [sp, #96] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x12, eq ; GISEL-NEXT: cmp x9, #13 ; GISEL-NEXT: csel x12, xzr, x12, eq @@ -1874,13 +1874,13 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x11, x11, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x11, [sp, #152] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #152] ; 8-byte Spill ; GISEL-NEXT: and x11, x8, #0x3f ; GISEL-NEXT: lsl x27, x24, x11 ; GISEL-NEXT: csel x11, xzr, x13, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x11, x27, x11 -; GISEL-NEXT: str x27, [sp, #56] ; 8-byte Folded Spill +; GISEL-NEXT: str x27, [sp, #56] ; 8-byte Spill ; GISEL-NEXT: csel x11, x11, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x12, xzr, x22, eq @@ -1892,7 +1892,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x12, xzr, x21, eq ; GISEL-NEXT: cmp x9, #2 -; GISEL-NEXT: ldr x21, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x21, [sp, #288] ; 8-byte Reload ; GISEL-NEXT: orr x12, x1, x12 ; GISEL-NEXT: mov x1, x27 ; GISEL-NEXT: csel x11, x12, x11, eq @@ -1908,7 +1908,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov x7, x15 ; GISEL-NEXT: csel x11, x12, x11, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x7, [sp, #40] ; 8-byte Folded Spill +; GISEL-NEXT: str x7, [sp, #40] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x23, eq ; GISEL-NEXT: cmp x9, #5 ; GISEL-NEXT: orr x12, x15, x12 @@ -1931,7 +1931,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x12, xzr, x25, eq ; GISEL-NEXT: cmp x9, #8 ; GISEL-NEXT: orr x12, x10, x12 -; GISEL-NEXT: ldr x10, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #304] ; 8-byte Reload ; GISEL-NEXT: csel x11, x12, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x12, xzr, x20, eq @@ -1958,27 +1958,27 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: and x10, x8, #0x3f ; GISEL-NEXT: csel x12, x24, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x24, [sp, #248] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x24, [sp, #248] ; 8-byte Reload ; GISEL-NEXT: lsl x15, x11, x10 ; GISEL-NEXT: csel x10, xzr, x14, eq ; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: str x12, [sp, #136] ; 8-byte Folded Spill -; GISEL-NEXT: ldr x12, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: str x12, [sp, #136] ; 8-byte Spill +; GISEL-NEXT: ldr x12, [sp, #312] ; 8-byte Reload ; GISEL-NEXT: orr x10, x15, x10 -; GISEL-NEXT: str x15, [sp, #296] ; 8-byte Folded Spill +; GISEL-NEXT: str x15, [sp, #296] ; 8-byte Spill ; GISEL-NEXT: mov x15, x13 ; GISEL-NEXT: csel x10, x10, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x13, eq ; GISEL-NEXT: cmp x9, #1 ; GISEL-NEXT: orr x13, x27, x13 -; GISEL-NEXT: ldr x27, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x27, [sp, #240] ; 8-byte Reload ; GISEL-NEXT: csel x10, x13, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x27, eq ; GISEL-NEXT: cmp x9, #2 ; GISEL-NEXT: orr x13, x22, x13 -; GISEL-NEXT: ldr x22, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x22, [sp, #272] ; 8-byte Reload ; GISEL-NEXT: csel x10, x13, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x3, eq @@ -2008,12 +2008,12 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x13, xzr, x24, eq ; GISEL-NEXT: cmp x9, #7 ; GISEL-NEXT: orr x13, x5, x13 -; GISEL-NEXT: ldr x5, [sp, #48] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x5, [sp, #48] ; 8-byte Reload ; GISEL-NEXT: csel x10, x13, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x2, eq ; GISEL-NEXT: cmp x9, #8 -; GISEL-NEXT: ldr x2, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x2, [sp, #296] ; 8-byte Reload ; GISEL-NEXT: orr x13, x5, x13 ; GISEL-NEXT: csel x10, x13, x10, eq ; GISEL-NEXT: tst x8, #0x3f @@ -2036,17 +2036,17 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x13, x11, x28 ; GISEL-NEXT: csel x10, x17, x10, eq ; GISEL-NEXT: cmp x9, #13 -; GISEL-NEXT: ldr x17, [sp, #80] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x17, [sp, #80] ; 8-byte Reload ; GISEL-NEXT: csel x10, xzr, x10, eq ; GISEL-NEXT: cmp x9, #14 -; GISEL-NEXT: str x13, [sp, #104] ; 8-byte Folded Spill +; GISEL-NEXT: str x13, [sp, #104] ; 8-byte Spill ; GISEL-NEXT: csel x10, xzr, x10, eq ; GISEL-NEXT: cmp x9, #15 ; GISEL-NEXT: csel x10, xzr, x10, eq ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x10, [sp, #128] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #128] ; 8-byte Spill ; GISEL-NEXT: and x10, x8, #0x3f ; GISEL-NEXT: lsl x11, x6, x10 ; GISEL-NEXT: csel x10, xzr, x13, eq @@ -2054,7 +2054,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldp x0, x13, [sp, #280] ; 16-byte Folded Reload ; GISEL-NEXT: mov x6, x16 ; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: str x11, [sp, #88] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #88] ; 8-byte Spill ; GISEL-NEXT: csel x10, x10, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x14, eq @@ -2125,18 +2125,18 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #13 ; GISEL-NEXT: csel x10, x16, x10, eq ; GISEL-NEXT: cmp x9, #14 -; GISEL-NEXT: ldr x16, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #304] ; 8-byte Reload ; GISEL-NEXT: csel x10, xzr, x10, eq ; GISEL-NEXT: cmp x9, #15 ; GISEL-NEXT: csel x11, xzr, x10, eq ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: ldp x10, x4, [x16, #112] ; GISEL-NEXT: csel x11, x25, x11, eq -; GISEL-NEXT: str x11, [sp, #120] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #120] ; 8-byte Spill ; GISEL-NEXT: lsr x11, x25, x28 ; GISEL-NEXT: and x16, x8, #0x3f ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x25, [sp, #88] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x25, [sp, #88] ; 8-byte Reload ; GISEL-NEXT: lsl x24, x10, x16 ; GISEL-NEXT: csel x1, xzr, x11, eq ; GISEL-NEXT: cmp x9, #0 @@ -2152,7 +2152,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x30, xzr, x7, eq ; GISEL-NEXT: cmp x9, #2 ; GISEL-NEXT: orr x30, x2, x30 -; GISEL-NEXT: ldr x2, [sp, #56] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x2, [sp, #56] ; 8-byte Reload ; GISEL-NEXT: csel x1, x30, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x30, xzr, x16, eq @@ -2164,18 +2164,18 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #4 ; GISEL-NEXT: mov x27, x13 ; GISEL-NEXT: orr x30, x0, x30 -; GISEL-NEXT: ldr x0, [sp, #248] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x0, [sp, #248] ; 8-byte Reload ; GISEL-NEXT: csel x1, x30, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x30, xzr, x15, eq -; GISEL-NEXT: ldr x15, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #312] ; 8-byte Reload ; GISEL-NEXT: cmp x9, #5 ; GISEL-NEXT: orr x30, x15, x30 ; GISEL-NEXT: csel x1, x30, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x30, xzr, x3, eq ; GISEL-NEXT: cmp x9, #6 -; GISEL-NEXT: ldr x3, [sp, #40] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x3, [sp, #40] ; 8-byte Reload ; GISEL-NEXT: orr x30, x13, x30 ; GISEL-NEXT: csel x1, x30, x1, eq ; GISEL-NEXT: tst x8, #0x3f @@ -2215,7 +2215,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x30, xzr, x22, eq ; GISEL-NEXT: cmp x9, #13 ; GISEL-NEXT: orr x30, x5, x30 -; GISEL-NEXT: ldr x5, [sp, #16] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x5, [sp, #16] ; 8-byte Reload ; GISEL-NEXT: csel x1, x30, x1, eq ; GISEL-NEXT: cmp x9, #14 ; GISEL-NEXT: csel x1, x6, x1, eq @@ -2231,7 +2231,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: ldp x29, x30, [sp, #416] ; 16-byte Folded Reload ; GISEL-NEXT: orr x10, x10, x1 -; GISEL-NEXT: ldr x1, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x1, [sp, #296] ; 8-byte Reload ; GISEL-NEXT: csel x10, x10, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x11, eq @@ -2251,34 +2251,34 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x16, eq ; GISEL-NEXT: cmp x9, #4 -; GISEL-NEXT: ldr x16, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #280] ; 8-byte Reload ; GISEL-NEXT: orr x11, x2, x11 ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #240] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #5 ; GISEL-NEXT: orr x11, x16, x11 ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #32] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #32] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #6 ; GISEL-NEXT: orr x11, x15, x11 ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #328] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #328] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #7 ; GISEL-NEXT: orr x11, x27, x11 ; GISEL-NEXT: ldp x28, x27, [sp, #336] ; 16-byte Folded Reload ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #320] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #320] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #8 ; GISEL-NEXT: orr x11, x13, x11 -; GISEL-NEXT: ldr x13, [sp, #144] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x13, [sp, #144] ; 8-byte Reload ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x14, eq @@ -2290,7 +2290,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #10 ; GISEL-NEXT: orr x11, x12, x11 ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #232] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: str x11, [x13] ; GISEL-NEXT: ldp x12, x11, [sp, #216] ; 16-byte Folded Reload @@ -2300,7 +2300,7 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x11, x23, x11 ; GISEL-NEXT: ldp x24, x23, [sp, #368] ; 16-byte Folded Reload ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #208] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: str x11, [x13, #24] ; GISEL-NEXT: ldp x12, x11, [sp, #192] ; 16-byte Folded Reload @@ -2309,22 +2309,22 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #12 ; GISEL-NEXT: orr x11, x21, x11 ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #184] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: str x11, [x13, #48] ; GISEL-NEXT: ldp x12, x11, [sp, #168] ; 16-byte Folded Reload ; GISEL-NEXT: stp x11, x12, [x13, #56] -; GISEL-NEXT: ldr x11, [sp, #112] ; 8-byte Folded Reload -; GISEL-NEXT: ldr x12, [sp, #136] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #112] ; 8-byte Reload +; GISEL-NEXT: ldr x12, [sp, #136] ; 8-byte Reload ; GISEL-NEXT: csel x11, xzr, x11, eq ; GISEL-NEXT: cmp x9, #13 ; GISEL-NEXT: orr x11, x20, x11 ; GISEL-NEXT: ldp x20, x19, [sp, #400] ; 16-byte Folded Reload ; GISEL-NEXT: csel x10, x11, x10, eq -; GISEL-NEXT: ldr x11, [sp, #160] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #160] ; 8-byte Reload ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: str x11, [x13, #72] -; GISEL-NEXT: ldr x11, [sp, #152] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #152] ; 8-byte Reload ; GISEL-NEXT: str x11, [x13, #80] ; GISEL-NEXT: csel x11, xzr, x22, eq ; GISEL-NEXT: cmp x9, #14 @@ -2332,12 +2332,12 @@ define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldp x22, x21, [sp, #384] ; 16-byte Folded Reload ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: cmp x9, #15 -; GISEL-NEXT: ldr x9, [sp, #128] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x9, [sp, #128] ; 8-byte Reload ; GISEL-NEXT: ldp x26, x25, [sp, #352] ; 16-byte Folded Reload ; GISEL-NEXT: stp x12, x9, [x13, #88] ; GISEL-NEXT: csel x9, x6, x10, eq ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: ldr x8, [sp, #120] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x8, [sp, #120] ; 8-byte Reload ; GISEL-NEXT: stp x8, x5, [x13, #104] ; GISEL-NEXT: csel x8, x4, x9, eq ; GISEL-NEXT: str x8, [x13, #120] @@ -2513,12 +2513,12 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x9, x8, #6 ; GISEL-NEXT: lsr x11, x20, x14 ; GISEL-NEXT: lsr x19, x16, x14 -; GISEL-NEXT: str x16, [sp, #264] ; 8-byte Folded Spill +; GISEL-NEXT: str x16, [sp, #264] ; 8-byte Spill ; GISEL-NEXT: csel x10, xzr, x10, eq ; GISEL-NEXT: lsl x22, x12, x15 ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: str x12, [sp, #240] ; 8-byte Folded Spill +; GISEL-NEXT: str x12, [sp, #240] ; 8-byte Spill ; GISEL-NEXT: lsr x26, x12, x14 ; GISEL-NEXT: csel x10, x10, xzr, eq ; GISEL-NEXT: tst x8, #0x3f @@ -2530,10 +2530,10 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldp x12, x16, [x1, #32] ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x0, [sp, #296] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #296] ; 8-byte Spill ; GISEL-NEXT: csel x11, xzr, x24, eq ; GISEL-NEXT: cmp x9, #2 -; GISEL-NEXT: str x13, [sp, #216] ; 8-byte Folded Spill +; GISEL-NEXT: str x13, [sp, #216] ; 8-byte Spill ; GISEL-NEXT: lsl x23, x12, x15 ; GISEL-NEXT: orr x11, x26, x11 ; GISEL-NEXT: stp x12, x16, [sp, #176] ; 16-byte Folded Spill @@ -2566,13 +2566,13 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldp x4, x2, [x1, #64] ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x17, [sp, #144] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x17, [sp, #144] ; 8-byte Reload ; GISEL-NEXT: stp x5, x23, [sp, #24] ; 16-byte Folded Spill ; GISEL-NEXT: csel x11, xzr, x0, eq ; GISEL-NEXT: cmp x9, #6 ; GISEL-NEXT: lsl x3, x4, x15 ; GISEL-NEXT: orr x11, x13, x11 -; GISEL-NEXT: str x4, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: str x4, [sp, #208] ; 8-byte Spill ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsl x6, x2, x15 @@ -2580,7 +2580,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #7 ; GISEL-NEXT: lsr x13, x4, x14 ; GISEL-NEXT: orr x11, x12, x11 -; GISEL-NEXT: str x2, [sp, #224] ; 8-byte Folded Spill +; GISEL-NEXT: str x2, [sp, #224] ; 8-byte Spill ; GISEL-NEXT: csel x16, x11, x10, eq ; GISEL-NEXT: ldp x10, x4, [x1, #80] ; GISEL-NEXT: tst x8, #0x3f @@ -2591,16 +2591,16 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x11, x11, x14 ; GISEL-NEXT: orr x2, x13, x2 ; GISEL-NEXT: lsl x12, x10, x15 -; GISEL-NEXT: str x10, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #232] ; 8-byte Spill ; GISEL-NEXT: csel x16, x2, x16, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsr x10, x10, x14 ; GISEL-NEXT: csel x2, xzr, x12, eq -; GISEL-NEXT: str x12, [sp, #312] ; 8-byte Folded Spill +; GISEL-NEXT: str x12, [sp, #312] ; 8-byte Spill ; GISEL-NEXT: cmp x9, #9 ; GISEL-NEXT: orr x2, x11, x2 ; GISEL-NEXT: lsl x12, x4, x15 -; GISEL-NEXT: str x10, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #304] ; 8-byte Spill ; GISEL-NEXT: csel x16, x2, x16, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsr x13, x4, x14 @@ -2614,7 +2614,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: stp x4, x11, [sp, #248] ; 16-byte Folded Spill ; GISEL-NEXT: csel x16, x2, x16, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x3, [sp, #16] ; 8-byte Folded Spill +; GISEL-NEXT: str x3, [sp, #16] ; 8-byte Spill ; GISEL-NEXT: csel x2, xzr, x28, eq ; GISEL-NEXT: lsl x12, x10, x15 ; GISEL-NEXT: cmp x9, #11 @@ -2632,7 +2632,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x27, x30, x14 ; GISEL-NEXT: orr x4, x13, x2 ; GISEL-NEXT: mov x12, x23 -; GISEL-NEXT: str x28, [sp, #48] ; 8-byte Folded Spill +; GISEL-NEXT: str x28, [sp, #48] ; 8-byte Spill ; GISEL-NEXT: csel x16, x4, x16, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsl x25, x11, x15 @@ -2641,10 +2641,10 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: stp x10, x13, [sp, #72] ; 16-byte Folded Spill ; GISEL-NEXT: orr x1, x10, x1 ; GISEL-NEXT: lsr x10, x11, x14 -; GISEL-NEXT: str x11, [sp, #288] ; 8-byte Folded Spill +; GISEL-NEXT: str x11, [sp, #288] ; 8-byte Spill ; GISEL-NEXT: csel x1, x1, x16, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x21, [sp, #40] ; 8-byte Folded Spill +; GISEL-NEXT: str x21, [sp, #40] ; 8-byte Spill ; GISEL-NEXT: csel x30, xzr, x25, eq ; GISEL-NEXT: cmp x9, #14 ; GISEL-NEXT: stp x27, x10, [sp, #56] ; 16-byte Folded Spill @@ -2659,7 +2659,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x1, xzr, x22, eq ; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: str x10, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #168] ; 8-byte Spill ; GISEL-NEXT: orr x1, x19, x1 ; GISEL-NEXT: ldp x20, x14, [sp, #112] ; 16-byte Folded Reload ; GISEL-NEXT: csel x1, x1, xzr, eq @@ -2712,7 +2712,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x19, xzr, x22, eq ; GISEL-NEXT: cmp x9, #9 ; GISEL-NEXT: orr x19, x10, x19 -; GISEL-NEXT: ldr x10, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #264] ; 8-byte Reload ; GISEL-NEXT: csel x1, x19, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x19, xzr, x28, eq @@ -2746,8 +2746,8 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: mov x24, x11 ; GISEL-NEXT: orr x1, x26, x1 -; GISEL-NEXT: str x10, [sp, #264] ; 8-byte Folded Spill -; GISEL-NEXT: ldr x10, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: str x10, [sp, #264] ; 8-byte Spill +; GISEL-NEXT: ldr x10, [sp, #240] ; 8-byte Reload ; GISEL-NEXT: csel x1, x1, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: mov x26, x13 @@ -2783,7 +2783,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #5 ; GISEL-NEXT: mov x3, x22 ; GISEL-NEXT: orr x19, x23, x19 -; GISEL-NEXT: ldr x23, [sp, #16] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x23, [sp, #16] ; 8-byte Reload ; GISEL-NEXT: csel x1, x19, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x19, xzr, x6, eq @@ -2793,14 +2793,14 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x19, xzr, x15, eq ; GISEL-NEXT: cmp x9, #7 -; GISEL-NEXT: ldr x15, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #304] ; 8-byte Reload ; GISEL-NEXT: orr x19, x20, x19 ; GISEL-NEXT: ldp x14, x20, [sp, #40] ; 16-byte Folded Reload ; GISEL-NEXT: csel x1, x19, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x19, xzr, x22, eq ; GISEL-NEXT: cmp x9, #8 -; GISEL-NEXT: ldr x22, [sp, #56] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x22, [sp, #56] ; 8-byte Reload ; GISEL-NEXT: orr x19, x15, x19 ; GISEL-NEXT: csel x1, x19, x1, eq ; GISEL-NEXT: tst x8, #0x3f @@ -2832,13 +2832,13 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x10, x10, x1, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x10, [sp, #240] ; 8-byte Folded Spill -; GISEL-NEXT: ldr x10, [sp, #32] ; 8-byte Folded Reload +; GISEL-NEXT: str x10, [sp, #240] ; 8-byte Spill +; GISEL-NEXT: ldr x10, [sp, #32] ; 8-byte Reload ; GISEL-NEXT: csel x1, xzr, x10, eq -; GISEL-NEXT: ldr x10, [sp, #24] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #24] ; 8-byte Reload ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x1, x10, x1 -; GISEL-NEXT: ldr x10, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #216] ; 8-byte Reload ; GISEL-NEXT: csel x1, x1, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x19, xzr, x26, eq @@ -2917,9 +2917,9 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x1, xzr, x26, eq ; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: str x10, [sp, #216] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #216] ; 8-byte Spill ; GISEL-NEXT: orr x1, x24, x1 -; GISEL-NEXT: ldr x10, [sp, #176] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #176] ; 8-byte Reload ; GISEL-NEXT: mov x24, x3 ; GISEL-NEXT: csel x1, x1, xzr, eq ; GISEL-NEXT: tst x8, #0x3f @@ -2932,7 +2932,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x19, xzr, x27, eq ; GISEL-NEXT: cmp x9, #2 ; GISEL-NEXT: orr x19, x30, x19 -; GISEL-NEXT: ldr x30, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #312] ; 8-byte Reload ; GISEL-NEXT: csel x1, x19, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x19, xzr, x23, eq @@ -2987,7 +2987,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x19, x10, x1, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x10, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #184] ; 8-byte Reload ; GISEL-NEXT: csel x1, xzr, x11, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: mov x11, x23 @@ -3058,9 +3058,9 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x17, xzr, x27, eq ; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: str x10, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: str x10, [sp, #184] ; 8-byte Spill ; GISEL-NEXT: orr x17, x22, x17 -; GISEL-NEXT: ldr x10, [sp, #192] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #192] ; 8-byte Reload ; GISEL-NEXT: csel x17, x17, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x0, xzr, x11, eq @@ -3119,7 +3119,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x17, x10, x17, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x10, [sp, #200] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x10, [sp, #200] ; 8-byte Reload ; GISEL-NEXT: csel x13, xzr, x11, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x12, x12, x13 @@ -3201,7 +3201,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x11, xzr, x2, eq ; GISEL-NEXT: cmp x9, #4 ; GISEL-NEXT: orr x11, x4, x11 -; GISEL-NEXT: ldr x4, [sp, #168] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x4, [sp, #168] ; 8-byte Reload ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x11, xzr, x20, eq @@ -3214,7 +3214,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x11, x23, x11 ; GISEL-NEXT: csel x10, x11, x10, eq ; GISEL-NEXT: cmp x9, #7 -; GISEL-NEXT: ldr x11, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #208] ; 8-byte Reload ; GISEL-NEXT: csel x10, x28, x10, eq ; GISEL-NEXT: cmp x9, #8 ; GISEL-NEXT: csel x10, xzr, x10, eq @@ -3266,7 +3266,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x13, x23, x13 ; GISEL-NEXT: csel x11, x13, x11, eq ; GISEL-NEXT: cmp x9, #6 -; GISEL-NEXT: ldr x13, [sp, #224] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x13, [sp, #224] ; 8-byte Reload ; GISEL-NEXT: csel x11, x28, x11, eq ; GISEL-NEXT: cmp x9, #7 ; GISEL-NEXT: csel x11, xzr, x11, eq @@ -3314,7 +3314,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x0, x23, x0 ; GISEL-NEXT: csel x13, x0, x13, eq ; GISEL-NEXT: cmp x9, #5 -; GISEL-NEXT: ldr x0, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x0, [sp, #232] ; 8-byte Reload ; GISEL-NEXT: csel x13, x28, x13, eq ; GISEL-NEXT: cmp x9, #6 ; GISEL-NEXT: csel x13, xzr, x13, eq @@ -3342,7 +3342,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x0, xzr, x24, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x0, x16, x0 -; GISEL-NEXT: ldr x16, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #280] ; 8-byte Reload ; GISEL-NEXT: csel x0, x0, xzr, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x1, xzr, x2, eq @@ -3360,7 +3360,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x1, x23, x1 ; GISEL-NEXT: csel x0, x1, x0, eq ; GISEL-NEXT: cmp x9, #4 -; GISEL-NEXT: ldr x1, [sp, #248] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x1, [sp, #248] ; 8-byte Reload ; GISEL-NEXT: csel x0, x28, x0, eq ; GISEL-NEXT: cmp x9, #5 ; GISEL-NEXT: csel x0, xzr, x0, eq @@ -3402,7 +3402,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x3, x23, x3 ; GISEL-NEXT: csel x1, x3, x1, eq ; GISEL-NEXT: cmp x9, #3 -; GISEL-NEXT: ldr x3, [sp, #256] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x3, [sp, #256] ; 8-byte Reload ; GISEL-NEXT: csel x1, x28, x1, eq ; GISEL-NEXT: cmp x9, #4 ; GISEL-NEXT: csel x1, xzr, x1, eq @@ -3442,7 +3442,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x2, x23, x2 ; GISEL-NEXT: csel x1, x2, x1, eq ; GISEL-NEXT: cmp x9, #2 -; GISEL-NEXT: ldr x2, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x2, [sp, #272] ; 8-byte Reload ; GISEL-NEXT: csel x1, x28, x1, eq ; GISEL-NEXT: cmp x9, #3 ; GISEL-NEXT: csel x1, xzr, x1, eq @@ -3473,7 +3473,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x2, x2, x1, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x1, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x1, [sp, #264] ; 8-byte Reload ; GISEL-NEXT: csel x15, xzr, x25, eq ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: orr x15, x23, x15 @@ -3512,7 +3512,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x15, x16, x15, eq ; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: ldr x16, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #296] ; 8-byte Reload ; GISEL-NEXT: csel x14, x28, xzr, eq ; GISEL-NEXT: cmp x9, #1 ; GISEL-NEXT: csel x14, xzr, x14, eq @@ -3526,10 +3526,10 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: stp x4, x1, [x16] ; GISEL-NEXT: csel x14, xzr, x14, eq ; GISEL-NEXT: cmp x9, #5 -; GISEL-NEXT: ldr x4, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x4, [sp, #240] ; 8-byte Reload ; GISEL-NEXT: csel x14, xzr, x14, eq ; GISEL-NEXT: cmp x9, #6 -; GISEL-NEXT: ldr x1, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x1, [sp, #216] ; 8-byte Reload ; GISEL-NEXT: csel x14, xzr, x14, eq ; GISEL-NEXT: cmp x9, #7 ; GISEL-NEXT: stp x13, x0, [x16, #80] @@ -3538,7 +3538,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: stp x4, x1, [x16, #16] ; GISEL-NEXT: csel x14, xzr, x14, eq ; GISEL-NEXT: cmp x9, #9 -; GISEL-NEXT: ldr x1, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x1, [sp, #184] ; 8-byte Reload ; GISEL-NEXT: csel x12, xzr, x14, eq ; GISEL-NEXT: cmp x9, #10 ; GISEL-NEXT: stp x3, x2, [x16, #96] @@ -3555,7 +3555,7 @@ define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x9, #15 ; GISEL-NEXT: csel x9, xzr, x10, eq ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: ldr x8, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x8, [sp, #288] ; 8-byte Reload ; GISEL-NEXT: ldp x20, x19, [sp, #384] ; 16-byte Folded Reload ; GISEL-NEXT: ldp x26, x25, [sp, #336] ; 16-byte Folded Reload ; GISEL-NEXT: csel x8, x8, x9, eq @@ -3726,7 +3726,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: .cfi_offset w26, -80 ; GISEL-NEXT: .cfi_offset w27, -88 ; GISEL-NEXT: .cfi_offset w28, -96 -; GISEL-NEXT: str x0, [sp, #264] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #264] ; 8-byte Spill ; GISEL-NEXT: mov w8, w2 ; GISEL-NEXT: mov w9, #64 ; =0x40 ; GISEL-NEXT: ldp x7, x0, [x1] @@ -3740,13 +3740,13 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x12, x7, x15 ; GISEL-NEXT: asr x11, x28, #63 ; GISEL-NEXT: lsr x20, x0, x15 -; GISEL-NEXT: str x0, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #232] ; 8-byte Spill ; GISEL-NEXT: lsl x27, x28, x14 ; GISEL-NEXT: csel x9, xzr, x9, eq ; GISEL-NEXT: lsl x19, x17, x14 ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x12, x9 -; GISEL-NEXT: str x17, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: str x17, [sp, #208] ; 8-byte Spill ; GISEL-NEXT: lsr x2, x17, x15 ; GISEL-NEXT: csel x9, x9, x11, eq ; GISEL-NEXT: tst x8, #0x3f @@ -3758,13 +3758,13 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldp x13, x17, [x1, #32] ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x16, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: str x16, [sp, #184] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x30, eq ; GISEL-NEXT: cmp x10, #2 -; GISEL-NEXT: str x2, [sp, #88] ; 8-byte Folded Spill +; GISEL-NEXT: str x2, [sp, #88] ; 8-byte Spill ; GISEL-NEXT: lsl x24, x13, x14 ; GISEL-NEXT: orr x12, x2, x12 -; GISEL-NEXT: str x13, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: str x13, [sp, #168] ; 8-byte Spill ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsr x21, x13, x15 @@ -3778,15 +3778,15 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x3, x17, x15 ; GISEL-NEXT: csel x12, xzr, x0, eq ; GISEL-NEXT: cmp x10, #4 -; GISEL-NEXT: str x0, [sp, #128] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #128] ; 8-byte Spill ; GISEL-NEXT: lsl x2, x16, x14 ; GISEL-NEXT: orr x12, x21, x12 ; GISEL-NEXT: mov x0, x16 ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x17, [sp, #144] ; 8-byte Folded Spill +; GISEL-NEXT: str x17, [sp, #144] ; 8-byte Spill ; GISEL-NEXT: csel x12, xzr, x2, eq -; GISEL-NEXT: str x2, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: str x2, [sp, #304] ; 8-byte Spill ; GISEL-NEXT: lsl x2, x13, x14 ; GISEL-NEXT: cmp x10, #5 ; GISEL-NEXT: orr x12, x3, x12 @@ -3812,21 +3812,21 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: lsl x12, x13, x14 ; GISEL-NEXT: mov x16, x13 -; GISEL-NEXT: str x13, [sp, #192] ; 8-byte Folded Spill +; GISEL-NEXT: str x13, [sp, #192] ; 8-byte Spill ; GISEL-NEXT: ldp x13, x5, [x1, #80] ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x17, [sp, #176] ; 8-byte Folded Spill +; GISEL-NEXT: str x17, [sp, #176] ; 8-byte Spill ; GISEL-NEXT: csel x17, xzr, x12, eq -; GISEL-NEXT: str x0, [sp, #112] ; 8-byte Folded Spill +; GISEL-NEXT: str x0, [sp, #112] ; 8-byte Spill ; GISEL-NEXT: cmp x10, #8 ; GISEL-NEXT: orr x17, x0, x17 ; GISEL-NEXT: lsl x0, x13, x14 -; GISEL-NEXT: str x12, [sp, #280] ; 8-byte Folded Spill +; GISEL-NEXT: str x12, [sp, #280] ; 8-byte Spill ; GISEL-NEXT: csel x17, x17, x9, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsr x9, x16, x15 ; GISEL-NEXT: csel x4, xzr, x0, eq -; GISEL-NEXT: str x13, [sp, #200] ; 8-byte Folded Spill +; GISEL-NEXT: str x13, [sp, #200] ; 8-byte Spill ; GISEL-NEXT: cmp x10, #9 ; GISEL-NEXT: stp x9, x0, [sp, #96] ; 16-byte Folded Spill ; GISEL-NEXT: orr x4, x9, x4 @@ -3835,10 +3835,10 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: ldp x9, x13, [x1, #96] ; GISEL-NEXT: csel x17, x4, x17, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x28, [sp, #256] ; 8-byte Folded Spill +; GISEL-NEXT: str x28, [sp, #256] ; 8-byte Spill ; GISEL-NEXT: csel x4, xzr, x23, eq ; GISEL-NEXT: cmp x10, #10 -; GISEL-NEXT: str x3, [sp, #120] ; 8-byte Folded Spill +; GISEL-NEXT: str x3, [sp, #120] ; 8-byte Spill ; GISEL-NEXT: orr x4, x12, x4 ; GISEL-NEXT: lsl x16, x9, x14 ; GISEL-NEXT: stp x5, x9, [sp, #216] ; 16-byte Folded Spill @@ -3853,13 +3853,13 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x10, #11 ; GISEL-NEXT: orr x4, x9, x4 ; GISEL-NEXT: lsr x12, x12, x15 -; GISEL-NEXT: str x30, [sp, #48] ; 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #48] ; 8-byte Spill ; GISEL-NEXT: stp x16, x9, [sp, #56] ; 16-byte Folded Spill ; GISEL-NEXT: ldr x9, [x1, #112] ; GISEL-NEXT: csel x17, x4, x17, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: stp x25, x24, [sp, #16] ; 16-byte Folded Spill -; GISEL-NEXT: ldr x5, [sp, #96] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x5, [sp, #96] ; 8-byte Reload ; GISEL-NEXT: csel x1, xzr, x16, eq ; GISEL-NEXT: lsl x16, x9, x14 ; GISEL-NEXT: cmp x10, #12 @@ -3873,21 +3873,21 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: lsr x26, x12, x15 ; GISEL-NEXT: csel x17, xzr, x16, eq ; GISEL-NEXT: cmp x10, #13 -; GISEL-NEXT: str x23, [sp, #272] ; 8-byte Folded Spill +; GISEL-NEXT: str x23, [sp, #272] ; 8-byte Spill ; GISEL-NEXT: orr x13, x9, x17 -; GISEL-NEXT: str x9, [sp, #312] ; 8-byte Folded Spill +; GISEL-NEXT: str x9, [sp, #312] ; 8-byte Spill ; GISEL-NEXT: mov x9, x28 ; GISEL-NEXT: csel x13, x13, x1, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: lsr x22, x9, x15 ; GISEL-NEXT: csel x28, xzr, x27, eq ; GISEL-NEXT: cmp x10, #14 -; GISEL-NEXT: str x2, [sp, #8] ; 8-byte Folded Spill +; GISEL-NEXT: str x2, [sp, #8] ; 8-byte Spill ; GISEL-NEXT: orr x28, x26, x28 ; GISEL-NEXT: ldp x0, x16, [sp, #120] ; 16-byte Folded Reload ; GISEL-NEXT: csel x12, x28, x13, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x13, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x13, [sp, #304] ; 8-byte Reload ; GISEL-NEXT: csel x28, xzr, x3, eq ; GISEL-NEXT: cmp x10, #15 ; GISEL-NEXT: stp x22, x3, [sp, #32] ; 16-byte Folded Spill @@ -3898,12 +3898,12 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: mov x28, x24 ; GISEL-NEXT: csel x9, x7, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x7, [sp, #88] ; 8-byte Folded Reload -; GISEL-NEXT: str x9, [sp, #136] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x7, [sp, #88] ; 8-byte Reload +; GISEL-NEXT: str x9, [sp, #136] ; 8-byte Spill ; GISEL-NEXT: csel x9, xzr, x19, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x20, x9 -; GISEL-NEXT: ldr x12, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x12, [sp, #280] ; 8-byte Reload ; GISEL-NEXT: csel x9, x9, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x30, eq @@ -3954,7 +3954,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x23, eq ; GISEL-NEXT: cmp x10, #9 -; GISEL-NEXT: ldr x23, [sp, #328] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x23, [sp, #328] ; 8-byte Reload ; GISEL-NEXT: orr x20, x14, x20 ; GISEL-NEXT: csel x9, x20, x9, eq ; GISEL-NEXT: tst x8, #0x3f @@ -3978,12 +3978,12 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x20, xzr, x27, eq ; GISEL-NEXT: cmp x10, #13 ; GISEL-NEXT: orr x20, x26, x20 -; GISEL-NEXT: ldr x26, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x26, [sp, #272] ; 8-byte Reload ; GISEL-NEXT: csel x9, x20, x9, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x3, eq ; GISEL-NEXT: cmp x10, #14 -; GISEL-NEXT: ldr x3, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x3, [sp, #232] ; 8-byte Reload ; GISEL-NEXT: orr x20, x22, x20 ; GISEL-NEXT: mov x22, x23 ; GISEL-NEXT: csel x9, x20, x9, eq @@ -3993,12 +3993,12 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x9, x3, x9, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: mov x3, x16 -; GISEL-NEXT: str x9, [sp, #232] ; 8-byte Folded Spill -; GISEL-NEXT: ldr x9, [sp, #48] ; 8-byte Folded Reload +; GISEL-NEXT: str x9, [sp, #232] ; 8-byte Spill +; GISEL-NEXT: ldr x9, [sp, #48] ; 8-byte Reload ; GISEL-NEXT: csel x9, xzr, x9, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x7, x9 -; GISEL-NEXT: ldr x7, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x7, [sp, #312] ; 8-byte Reload ; GISEL-NEXT: csel x9, x9, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x28, eq @@ -4034,7 +4034,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x12, eq ; GISEL-NEXT: cmp x10, #6 -; GISEL-NEXT: ldr x12, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x12, [sp, #208] ; 8-byte Reload ; GISEL-NEXT: orr x20, x19, x20 ; GISEL-NEXT: mov x19, x27 ; GISEL-NEXT: csel x9, x20, x9, eq @@ -4083,24 +4083,24 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: str x9, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: str x9, [sp, #208] ; 8-byte Spill ; GISEL-NEXT: ldp x12, x9, [sp, #16] ; 16-byte Folded Reload ; GISEL-NEXT: csel x9, xzr, x9, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x12, x9 -; GISEL-NEXT: ldr x12, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x12, [sp, #184] ; 8-byte Reload ; GISEL-NEXT: csel x9, x9, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x3, eq ; GISEL-NEXT: cmp x10, #1 -; GISEL-NEXT: ldr x3, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x3, [sp, #296] ; 8-byte Reload ; GISEL-NEXT: orr x20, x25, x20 -; GISEL-NEXT: ldr x25, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x25, [sp, #280] ; 8-byte Reload ; GISEL-NEXT: csel x9, x20, x9, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x13, eq ; GISEL-NEXT: cmp x10, #2 -; GISEL-NEXT: ldr x13, [sp, #8] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x13, [sp, #8] ; 8-byte Reload ; GISEL-NEXT: orr x20, x16, x20 ; GISEL-NEXT: csel x9, x20, x9, eq ; GISEL-NEXT: tst x8, #0x3f @@ -4148,7 +4148,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x30, eq ; GISEL-NEXT: cmp x10, #10 -; GISEL-NEXT: ldr x30, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #272] ; 8-byte Reload ; GISEL-NEXT: orr x20, x7, x20 ; GISEL-NEXT: csel x9, x20, x9, eq ; GISEL-NEXT: tst x8, #0x3f @@ -4170,12 +4170,12 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x12, [sp, #168] ; 8-byte Folded Reload -; GISEL-NEXT: str x9, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x12, [sp, #168] ; 8-byte Reload +; GISEL-NEXT: str x9, [sp, #184] ; 8-byte Spill ; GISEL-NEXT: csel x9, xzr, x5, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x21, x9 -; GISEL-NEXT: ldr x5, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x5, [sp, #304] ; 8-byte Reload ; GISEL-NEXT: mov x21, x0 ; GISEL-NEXT: csel x9, x9, x11, eq ; GISEL-NEXT: tst x8, #0x3f @@ -4188,7 +4188,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: csel x20, xzr, x13, eq ; GISEL-NEXT: cmp x10, #2 ; GISEL-NEXT: orr x20, x24, x20 -; GISEL-NEXT: ldr x24, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x24, [sp, #288] ; 8-byte Reload ; GISEL-NEXT: csel x9, x20, x9, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x20, xzr, x3, eq @@ -4246,8 +4246,8 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x12, [sp, #144] ; 8-byte Folded Reload -; GISEL-NEXT: str x9, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x12, [sp, #144] ; 8-byte Reload +; GISEL-NEXT: str x9, [sp, #168] ; 8-byte Spill ; GISEL-NEXT: csel x9, xzr, x5, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x28, x9 @@ -4320,8 +4320,8 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x12, [sp, #152] ; 8-byte Folded Reload -; GISEL-NEXT: str x9, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x12, [sp, #152] ; 8-byte Reload +; GISEL-NEXT: str x9, [sp, #304] ; 8-byte Spill ; GISEL-NEXT: csel x9, xzr, x13, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x16, x9 @@ -4388,7 +4388,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x20, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x12, [sp, #160] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x12, [sp, #160] ; 8-byte Reload ; GISEL-NEXT: csel x9, xzr, x28, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x24, x9 @@ -4451,11 +4451,11 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x2, x12, x9, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x12, [sp, #176] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x12, [sp, #176] ; 8-byte Reload ; GISEL-NEXT: csel x9, xzr, x25, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x9, x16, x9 -; GISEL-NEXT: ldr x16, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #216] ; 8-byte Reload ; GISEL-NEXT: csel x9, x9, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x13, xzr, x3, eq @@ -4546,7 +4546,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x12, x27, x12 ; GISEL-NEXT: csel x9, x12, x9, eq ; GISEL-NEXT: cmp x10, #7 -; GISEL-NEXT: ldr x12, [sp, #192] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x12, [sp, #192] ; 8-byte Reload ; GISEL-NEXT: csel x9, x11, x9, eq ; GISEL-NEXT: cmp x10, #8 ; GISEL-NEXT: csel x9, x11, x9, eq @@ -4599,7 +4599,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: orr x13, x27, x13 ; GISEL-NEXT: csel x12, x13, x12, eq ; GISEL-NEXT: cmp x10, #6 -; GISEL-NEXT: ldr x13, [sp, #200] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x13, [sp, #200] ; 8-byte Reload ; GISEL-NEXT: csel x12, x11, x12, eq ; GISEL-NEXT: cmp x10, #7 ; GISEL-NEXT: csel x12, x11, x12, eq @@ -4624,9 +4624,9 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x12, xzr, x14, eq ; GISEL-NEXT: cmp x10, #0 -; GISEL-NEXT: ldr x14, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x14, [sp, #264] ; 8-byte Reload ; GISEL-NEXT: orr x12, x15, x12 -; GISEL-NEXT: ldr x15, [sp, #136] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #136] ; 8-byte Reload ; GISEL-NEXT: csel x12, x12, x11, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: stp x9, x13, [x14, #72] @@ -4634,7 +4634,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x10, #1 ; GISEL-NEXT: str x15, [x14] ; GISEL-NEXT: orr x0, x1, x0 -; GISEL-NEXT: ldr x15, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #232] ; 8-byte Reload ; GISEL-NEXT: stp x2, x6, [x14, #56] ; GISEL-NEXT: csel x12, x0, x12, eq ; GISEL-NEXT: tst x8, #0x3f @@ -4642,21 +4642,21 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x10, #2 ; GISEL-NEXT: str x15, [x14, #8] ; GISEL-NEXT: orr x0, x5, x0 -; GISEL-NEXT: ldr x15, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #208] ; 8-byte Reload ; GISEL-NEXT: csel x12, x0, x12, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x0, xzr, x7, eq ; GISEL-NEXT: cmp x10, #3 ; GISEL-NEXT: str x15, [x14, #16] ; GISEL-NEXT: orr x0, x19, x0 -; GISEL-NEXT: ldr x15, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #184] ; 8-byte Reload ; GISEL-NEXT: csel x12, x0, x12, eq ; GISEL-NEXT: tst x8, #0x3f ; GISEL-NEXT: csel x0, xzr, x23, eq ; GISEL-NEXT: cmp x10, #4 ; GISEL-NEXT: str x15, [x14, #24] ; GISEL-NEXT: orr x0, x27, x0 -; GISEL-NEXT: ldr x15, [sp, #168] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #168] ; 8-byte Reload ; GISEL-NEXT: csel x12, x0, x12, eq ; GISEL-NEXT: cmp x10, #5 ; GISEL-NEXT: csel x12, x11, x12, eq @@ -4664,7 +4664,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: str x15, [x14, #32] ; GISEL-NEXT: csel x12, x11, x12, eq ; GISEL-NEXT: cmp x10, #7 -; GISEL-NEXT: ldr x15, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x15, [sp, #304] ; 8-byte Reload ; GISEL-NEXT: csel x12, x11, x12, eq ; GISEL-NEXT: cmp x10, #8 ; GISEL-NEXT: csel x12, x11, x12, eq @@ -4686,7 +4686,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x0, x16, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x16, [sp, #224] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #224] ; 8-byte Reload ; GISEL-NEXT: csel x12, xzr, x17, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x12, x1, x12 @@ -4734,7 +4734,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x3, x16, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x16, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #240] ; 8-byte Reload ; GISEL-NEXT: csel x12, xzr, x4, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: stp x0, x3, [x14, #88] @@ -4779,7 +4779,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x8, #0 ; GISEL-NEXT: csel x4, x16, x12, eq ; GISEL-NEXT: tst x8, #0x3f -; GISEL-NEXT: ldr x16, [sp, #248] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x16, [sp, #248] ; 8-byte Reload ; GISEL-NEXT: csel x12, xzr, x7, eq ; GISEL-NEXT: cmp x10, #0 ; GISEL-NEXT: orr x12, x19, x12 @@ -4858,7 +4858,7 @@ define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { ; GISEL-NEXT: cmp x10, #15 ; GISEL-NEXT: csel x9, x11, x9, eq ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: ldr x8, [sp, #256] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x8, [sp, #256] ; 8-byte Reload ; GISEL-NEXT: ldp x28, x27, [sp, #336] ; 16-byte Folded Reload ; GISEL-NEXT: csel x8, x8, x9, eq ; GISEL-NEXT: str x8, [x14, #120] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll index ae26c363ef56b..e8b91d2fa6871 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/stacksave-stackrestore.ll @@ -9,7 +9,7 @@ define void @test_scoped_alloca(i64 %n) { ; CHECK-LABEL: test_scoped_alloca: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -24,7 +24,7 @@ define void @test_scoped_alloca(i64 %n) { ; CHECK-NEXT: bl use_addr ; CHECK-NEXT: mov sp, x19 ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %sp = call ptr @llvm.stacksave.p0() diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll index 6d27e4f4d603b..78fc6ccc50663 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -443,7 +443,7 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, ptr ; CHECK: mov x21, x19 ; CHECK: mov x7, x28 ; CHECK: ldp x29, x30, [sp, #96] ; 16-byte Folded Reload -; CHECK: ldr x28, [sp, #16] ; 8-byte Folded Reload +; CHECK: ldr x28, [sp, #16] ; 8-byte Reload ; CHECK: ldp x20, x19, [sp, #80] ; 16-byte Folded Reload ; CHECK: ldp x23, x22, [sp, #64] ; 16-byte Folded Reload ; CHECK: ldp x25, x24, [sp, #48] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll b/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll index db5289ac4bdca..3cfe10ed8a83d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll @@ -21,7 +21,7 @@ define fastcc i64 @baz() { ; CHECK-LABEL: baz: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, sp @@ -36,10 +36,10 @@ define fastcc i64 @baz() { ; CHECK-NEXT: mov x6, x7 ; CHECK-NEXT: bl foo ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x0, [sp, #8] // 8-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll index 95ccb983dfde0..f66245b8a1d37 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll @@ -15,7 +15,7 @@ define dso_local ptr addrspace(1) @foo(ptr addrspace(1) %arg) gc "statepoint-exa ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [sp, #16] -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: bl baz // 8-byte Folded Reload ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: ldp x19, x0, [sp, #8] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll index 83530049a50d6..1710fad9f2539 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -664,14 +664,14 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) { ; GISel-WITHOUT-MOPS-O0-LABEL: memset_size: ; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry ; GISel-WITHOUT-MOPS-O0-NEXT: sub sp, sp, #32 -; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 32 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; GISel-WITHOUT-MOPS-O0-NEXT: str x1, [sp, #8] // 8-byte Spill ; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, w2 -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O0-NEXT: ldr x2, [sp, #8] // 8-byte Reload ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; GISel-WITHOUT-MOPS-O0-NEXT: add sp, sp, #32 ; GISel-WITHOUT-MOPS-O0-NEXT: ret ; @@ -733,14 +733,14 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) { ; GISel-WITHOUT-MOPS-O0-LABEL: memset_size_volatile: ; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry ; GISel-WITHOUT-MOPS-O0-NEXT: sub sp, sp, #32 -; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 32 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; GISel-WITHOUT-MOPS-O0-NEXT: str x1, [sp, #8] // 8-byte Spill ; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, w2 -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O0-NEXT: ldr x2, [sp, #8] // 8-byte Reload ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset -; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; GISel-WITHOUT-MOPS-O0-NEXT: add sp, sp, #32 ; GISel-WITHOUT-MOPS-O0-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll index 10b30b0265de8..47e30e38bc51c 100644 --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -50,16 +50,16 @@ define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_add_const_add_const_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #10 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -127,16 +127,16 @@ define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_add_const_sub_const_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #6 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -206,16 +206,16 @@ define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_add_const_const_sub_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: mvni v0.4s, #5 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -283,16 +283,16 @@ define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_sub_const_add_const_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: mvni v0.4s, #5 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -360,16 +360,16 @@ define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_sub_const_sub_const_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #10 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -439,16 +439,16 @@ define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_sub_const_const_sub_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #2 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -519,16 +519,16 @@ define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_const_sub_add_const_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #10 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -599,16 +599,16 @@ define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_const_sub_sub_const_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #6 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -678,16 +678,16 @@ define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: vec_const_sub_const_sub_extrause: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.4s, #8 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: bl vec_use ; CHECK-NEXT: movi v0.4s, #2 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/alias_mask_scalable.ll b/llvm/test/CodeGen/AArch64/alias_mask_scalable.ll index 179dcfa11c108..3435ceca28e17 100644 --- a/llvm/test/CodeGen/AArch64/alias_mask_scalable.ll +++ b/llvm/test/CodeGen/AArch64/alias_mask_scalable.ll @@ -115,10 +115,10 @@ define @whilewr_16_expand(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -147,18 +147,18 @@ define @whilewr_16_expand(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, z3.d ; CHECK-NEXT: cmphi p6.d, p0/z, z2.d, z3.d ; CHECK-NEXT: uzp1 p2.s, p4.s, p5.s -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: incd z0.d, all, mul #4 ; CHECK-NEXT: uzp1 p3.s, p3.s, p6.s -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d ; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h ; CHECK-NEXT: cmp x8, #1 ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: sbfx x8, x8, #0, #1 ; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p2.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b ; CHECK-NEXT: whilelo p1.b, xzr, x8 @@ -176,12 +176,12 @@ define @whilewr_16_expand2(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -236,24 +236,24 @@ define @whilewr_16_expand2(ptr %a, ptr %b) { ; CHECK-NEXT: cmphi p0.d, p0/z, z5.d, z0.d ; CHECK-NEXT: uzp1 p4.s, p5.s, p4.s ; CHECK-NEXT: uzp1 p5.s, p9.s, p6.s -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: whilelo p6.b, xzr, x8 ; CHECK-NEXT: uzp1 p3.s, p8.s, p3.s ; CHECK-NEXT: cmp x9, #1 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.s, p0.s, p7.s ; CHECK-NEXT: cset w8, lt -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p4.h, p5.h, p4.h ; CHECK-NEXT: sbfx x8, x8, #0, #1 -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p1.b, p1.b, p2.b ; CHECK-NEXT: uzp1 p2.b, p0.b, p4.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: whilelo p3.b, xzr, x8 ; CHECK-NEXT: sel p0.b, p1, p1.b, p6.b -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: sel p1.b, p2, p2.b, p3.b ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -302,10 +302,10 @@ define @whilewr_32_expand2(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -335,18 +335,18 @@ define @whilewr_32_expand2(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, z3.d ; CHECK-NEXT: cmphi p6.d, p0/z, z2.d, z3.d ; CHECK-NEXT: uzp1 p2.s, p4.s, p5.s -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: incd z0.d, all, mul #4 ; CHECK-NEXT: uzp1 p3.s, p3.s, p6.s -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d ; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h ; CHECK-NEXT: cmp x8, #1 ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: sbfx x8, x8, #0, #1 ; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p2.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b ; CHECK-NEXT: whilelo p1.b, xzr, x8 @@ -364,13 +364,13 @@ define @whilewr_32_expand3(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -432,21 +432,21 @@ define @whilewr_32_expand3(ptr %a, ptr %b) { ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: uzp1 p7.s, p9.s, p8.s ; CHECK-NEXT: sbfx x8, x8, #0, #1 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p4.s, p10.s, p4.s -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.s, p0.s, p6.s -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p5.h, p7.h, p5.h -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p0.h, p4.h -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: whilelo p4.b, xzr, x8 ; CHECK-NEXT: uzp1 p3.b, p0.b, p5.b -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel p0.b, p2, p2.b, p1.b ; CHECK-NEXT: sel p1.b, p3, p3.b, p4.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -520,10 +520,10 @@ define @whilewr_64_expand3(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -553,18 +553,18 @@ define @whilewr_64_expand3(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, z3.d ; CHECK-NEXT: cmphi p6.d, p0/z, z2.d, z3.d ; CHECK-NEXT: uzp1 p2.s, p4.s, p5.s -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: incd z0.d, all, mul #4 ; CHECK-NEXT: uzp1 p3.s, p3.s, p6.s -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d ; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h ; CHECK-NEXT: cmp x8, #1 ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: sbfx x8, x8, #0, #1 ; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p2.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b ; CHECK-NEXT: whilelo p1.b, xzr, x8 @@ -582,13 +582,13 @@ define @whilewr_64_expand4(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -650,21 +650,21 @@ define @whilewr_64_expand4(ptr %a, ptr %b) { ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: uzp1 p7.s, p9.s, p8.s ; CHECK-NEXT: sbfx x8, x8, #0, #1 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p4.s, p10.s, p4.s -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.s, p0.s, p6.s -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p5.h, p7.h, p5.h -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p0.h, p4.h -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: whilelo p4.b, xzr, x8 ; CHECK-NEXT: uzp1 p3.b, p0.b, p5.b -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel p0.b, p2, p2.b, p1.b ; CHECK-NEXT: sel p1.b, p3, p3.b, p4.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -708,10 +708,10 @@ define @whilewr_badimm(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -743,17 +743,17 @@ define @whilewr_badimm(ptr %a, ptr %b) { ; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s ; CHECK-NEXT: incd z0.d, all, mul #4 ; CHECK-NEXT: uzp1 p2.s, p4.s, p5.s -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p3.s, p3.s, p6.s -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d ; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h ; CHECK-NEXT: cmp x8, #1 ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: sbfx x8, x8, #0, #1 ; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p2.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b ; CHECK-NEXT: whilelo p1.b, xzr, x8 diff --git a/llvm/test/CodeGen/AArch64/alias_mask_scalable_nosve2.ll b/llvm/test/CodeGen/AArch64/alias_mask_scalable_nosve2.ll index 8b5ea0bc3b3ce..d62d0665dd332 100644 --- a/llvm/test/CodeGen/AArch64/alias_mask_scalable_nosve2.ll +++ b/llvm/test/CodeGen/AArch64/alias_mask_scalable_nosve2.ll @@ -6,10 +6,10 @@ define @whilewr_8(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.d, #0, #1 @@ -35,17 +35,17 @@ define @whilewr_8(ptr %a, ptr %b) { ; CHECK-NEXT: cmphi p4.d, p0/z, z2.d, z4.d ; CHECK-NEXT: incd z4.d, all, mul #4 ; CHECK-NEXT: uzp1 p2.s, p5.s, p6.s -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z4.d ; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s ; CHECK-NEXT: cmp x8, #1 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: cset w8, lt ; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h ; CHECK-NEXT: sbfx x8, x8, #0, #1 ; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p2.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b ; CHECK-NEXT: whilelo p1.b, xzr, x8 diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll index 6019a62f4925e..fb1cb410c5c57 100644 --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -526,7 +526,7 @@ define void @caller_in_memory() { ; CHECK-SD-LABEL: caller_in_memory: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: add x8, sp, #8 @@ -538,7 +538,7 @@ define void @caller_in_memory() { ; CHECK-SD-NEXT: ldr d2, [sp, #72] ; CHECK-SD-NEXT: ldur q3, [sp, #56] ; CHECK-SD-NEXT: ldur q4, [sp, #40] -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: stp q1, q0, [x8] ; CHECK-SD-NEXT: str d2, [x8, #64] ; CHECK-SD-NEXT: stp q4, q3, [x8, #32] @@ -548,7 +548,7 @@ define void @caller_in_memory() { ; CHECK-GI-LABEL: caller_in_memory: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: add x8, sp, #8 @@ -612,7 +612,7 @@ define void @argument_in_memory() { ; CHECK-SD-LABEL: argument_in_memory: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: adrp x8, in_memory_store @@ -624,14 +624,14 @@ define void @argument_in_memory() { ; CHECK-SD-NEXT: stp q0, q1, [sp] ; CHECK-SD-NEXT: stp q2, q3, [sp, #32] ; CHECK-SD-NEXT: bl callee_in_memory -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #96 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: argument_in_memory: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: adrp x9, in_memory_store @@ -648,7 +648,7 @@ define void @argument_in_memory() { ; CHECK-GI-NEXT: stp x15, x16, [sp, #48] ; CHECK-GI-NEXT: str x9, [sp, #64] ; CHECK-GI-NEXT: bl callee_in_memory -; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #96 ; CHECK-GI-NEXT: ret %1 = load %T_IN_MEMORY, ptr @in_memory_store diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll index 3e4b887fed55d..8dd5c3ac05109 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -430,36 +430,36 @@ define <2 x fp128> @vec_add(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_add: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: bl __addtf3 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __addtf3 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_add: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: bl __addtf3 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret %val = fadd <2 x fp128> %lhs, %rhs @@ -470,36 +470,36 @@ define <2 x fp128> @vec_sub(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_sub: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: bl __subtf3 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __subtf3 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_sub: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __subtf3 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: bl __subtf3 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret %val = fsub <2 x fp128> %lhs, %rhs @@ -510,36 +510,36 @@ define <2 x fp128> @vec_mul(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_mul: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: bl __multf3 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __multf3 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_mul: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __multf3 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: bl __multf3 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret %val = fmul <2 x fp128> %lhs, %rhs @@ -550,36 +550,36 @@ define <2 x fp128> @vec_div(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_div: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: bl __divtf3 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __divtf3 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_div: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __divtf3 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: bl __divtf3 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret %val = fdiv <2 x fp128> %lhs, %rhs @@ -590,17 +590,17 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_fptosi_32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __fixtfsi ; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], w0 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -613,9 +613,9 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) { ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: fmov s0, w19 @@ -632,19 +632,19 @@ define <2 x i64> @vec_fptosi_64(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_fptosi_64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __fixtfdi ; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __fixtfdi ; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -656,9 +656,9 @@ define <2 x i64> @vec_fptosi_64(<2 x fp128> %val) { ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __fixtfdi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov x19, x0 ; CHECK-GI-NEXT: bl __fixtfdi ; CHECK-GI-NEXT: fmov d0, x19 @@ -674,17 +674,17 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_fptoui_32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: bl __fixunstfsi -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], w0 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -697,9 +697,9 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) { ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: fmov s0, w19 @@ -716,19 +716,19 @@ define <2 x i64> @vec_fptoui_64(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_fptoui_64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __fixunstfdi ; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __fixunstfdi ; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -740,9 +740,9 @@ define <2 x i64> @vec_fptoui_64(<2 x fp128> %val) { ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __fixunstfdi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov x19, x0 ; CHECK-GI-NEXT: bl __fixunstfdi ; CHECK-GI-NEXT: fmov d0, x19 @@ -758,28 +758,28 @@ define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) { ; CHECK-SD-LABEL: vec_sitofp_32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __floatsitf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, v1.s[1] ; CHECK-SD-NEXT: bl __floatsitf ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_sitofp_32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -788,12 +788,12 @@ define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) { ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: bl __floatsitf ; CHECK-GI-NEXT: fmov w0, s8 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __floatsitf ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %val32 = sitofp <2 x i32> %src32 to <2 x fp128> @@ -804,27 +804,27 @@ define <2 x fp128> @vec_sitofp_64(<2 x i64> %src64) { ; CHECK-SD-LABEL: vec_sitofp_64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __floatditf -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov x0, v0.d[1] ; CHECK-SD-NEXT: bl __floatditf ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_sitofp_64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -832,12 +832,12 @@ define <2 x fp128> @vec_sitofp_64(<2 x i64> %src64) { ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: bl __floatditf ; CHECK-GI-NEXT: fmov x0, d8 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __floatditf ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %val64 = sitofp <2 x i64> %src64 to <2 x fp128> @@ -848,28 +848,28 @@ define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) { ; CHECK-SD-LABEL: vec_uitofp_32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __floatunsitf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, v1.s[1] ; CHECK-SD-NEXT: bl __floatunsitf ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_uitofp_32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -878,12 +878,12 @@ define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) { ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: bl __floatunsitf ; CHECK-GI-NEXT: fmov w0, s8 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __floatunsitf ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %val32 = uitofp <2 x i32> %src32 to <2 x fp128> @@ -894,27 +894,27 @@ define <2 x fp128> @vec_uitofp_64(<2 x i64> %src64) { ; CHECK-SD-LABEL: vec_uitofp_64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __floatunditf -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov x0, v0.d[1] ; CHECK-SD-NEXT: bl __floatunditf ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_uitofp_64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -922,12 +922,12 @@ define <2 x fp128> @vec_uitofp_64(<2 x i64> %src64) { ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: bl __floatunditf ; CHECK-GI-NEXT: fmov x0, d8 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __floatunditf ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %val64 = uitofp <2 x i64> %src64 to <2 x fp128> @@ -938,8 +938,8 @@ define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_setcc1: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -954,12 +954,12 @@ define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-NEXT: fmov d8, x8 ; CHECK-SD-NEXT: bl __letf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: cset w8, le ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v8.2s -; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -993,8 +993,8 @@ define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_setcc2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -1009,12 +1009,12 @@ define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-NEXT: fmov d8, x8 ; CHECK-SD-NEXT: bl __letf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: cset w8, gt ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v8.2s -; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -1048,7 +1048,7 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-LABEL: vec_setcc3: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str d8, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d8, [sp, #64] // 8-byte Spill ; CHECK-SD-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w19, -8 @@ -1079,14 +1079,14 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v8.2s -; CHECK-SD-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d8, [sp, #64] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #96 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_setcc3: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -1111,7 +1111,7 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) { ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: fmov s0, w19 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: cset w8, ne ; CHECK-GI-NEXT: orr w8, w20, w8 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload @@ -1177,19 +1177,19 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_round_f16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __trunctfhf2 ; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __trunctfhf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1198,26 +1198,26 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) { ; CHECK-GI-LABEL: vec_round_f16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: mov v2.d[0], x8 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov v2.d[1], x8 -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfhf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: add sp, sp, #64 @@ -1230,19 +1230,19 @@ define <2 x float> @vec_round_f32(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_round_f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __trunctfsf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __trunctfsf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1251,18 +1251,18 @@ define <2 x float> @vec_round_f32(<2 x fp128> %val) { ; CHECK-GI-LABEL: vec_round_f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __trunctfsf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfsf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #48 @@ -1275,19 +1275,19 @@ define <2 x double> @vec_round_f64(<2 x fp128> %val) { ; CHECK-SD-LABEL: vec_round_f64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __trunctfdf2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __trunctfdf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -1295,18 +1295,18 @@ define <2 x double> @vec_round_f64(<2 x fp128> %val) { ; CHECK-GI-LABEL: vec_round_f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __trunctfdf2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfdf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #48 @@ -1319,29 +1319,29 @@ define <2 x fp128> @vec_extend_f16(<2 x half> %val) { ; CHECK-SD-LABEL: vec_extend_f16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-SD-NEXT: bl __extendhftf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov h1, v1.h[1] ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __extendhftf2 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_extend_f16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -1349,13 +1349,13 @@ define <2 x fp128> @vec_extend_f16(<2 x half> %val) { ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-GI-NEXT: bl __extendhftf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __extendhftf2 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %dst = fpext <2 x half> %val to <2 x fp128> @@ -1366,29 +1366,29 @@ define <2 x fp128> @vec_extend_f32(<2 x float> %val) { ; CHECK-SD-LABEL: vec_extend_f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __extendsftf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __extendsftf2 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_extend_f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -1396,13 +1396,13 @@ define <2 x fp128> @vec_extend_f32(<2 x float> %val) { ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __extendsftf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __extendsftf2 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %dst = fpext <2 x float> %val to <2 x fp128> @@ -1413,40 +1413,40 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) { ; CHECK-SD-LABEL: vec_extend_f64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __extenddftf2 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl __extenddftf2 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_extend_f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __extenddftf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl __extenddftf2 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %dst = fpext <2 x double> %val to <2 x fp128> @@ -1457,45 +1457,45 @@ define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) { ; CHECK-SD-LABEL: vec_neg_sub: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: adrp x8, .LCPI47_0 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI47_0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __subtf3 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: bl __subtf3 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vec_neg_sub: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: mov v2.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI47_0 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI47_0] ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __subtf3 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: bl __subtf3 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret %ret = fsub <2 x fp128> zeroinitializer, %in diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll index 52b09885ebb1c..c420267837d43 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -96,14 +96,14 @@ define void @bzero_12_stack() { ; CHECK-LABEL: bzero_12_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str wzr, [sp, #8] ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %buf = alloca [12 x i8], align 1 @@ -122,7 +122,7 @@ define void @bzero_16_stack() { ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %buf = alloca [16 x i8], align 1 @@ -135,14 +135,14 @@ define void @bzero_20_stack() { ; CHECK-LABEL: bzero_20_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: stp xzr, xzr, [sp, #8] ; CHECK-NEXT: str wzr, [sp, #24] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret %buf = alloca [20 x i8], align 1 @@ -155,7 +155,7 @@ define void @bzero_26_stack() { ; CHECK-LABEL: bzero_26_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x0, sp @@ -163,7 +163,7 @@ define void @bzero_26_stack() { ; CHECK-NEXT: strh wzr, [sp, #24] ; CHECK-NEXT: str xzr, [sp, #16] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret %buf = alloca [26 x i8], align 1 @@ -176,14 +176,14 @@ define void @bzero_32_stack() { ; CHECK-LABEL: bzero_32_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret %buf = alloca [32 x i8], align 1 @@ -196,7 +196,7 @@ define void @bzero_40_stack() { ; CHECK-LABEL: bzero_40_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 @@ -204,7 +204,7 @@ define void @bzero_40_stack() { ; CHECK-NEXT: str xzr, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %buf = alloca [40 x i8], align 1 @@ -217,7 +217,7 @@ define void @bzero_64_stack() { ; CHECK-LABEL: bzero_64_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 @@ -225,7 +225,7 @@ define void @bzero_64_stack() { ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %buf = alloca [64 x i8], align 1 @@ -238,7 +238,7 @@ define void @bzero_72_stack() { ; CHECK-LABEL: bzero_72_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 @@ -247,7 +247,7 @@ define void @bzero_72_stack() { ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %buf = alloca [72 x i8], align 1 @@ -260,7 +260,7 @@ define void @bzero_128_stack() { ; CHECK-LABEL: bzero_128_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #144 -; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 @@ -270,7 +270,7 @@ define void @bzero_128_stack() { ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #144 ; CHECK-NEXT: ret %buf = alloca [128 x i8], align 1 @@ -346,7 +346,7 @@ define void @memset_12_stack() { ; CHECK-LABEL: memset_12_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 @@ -354,7 +354,7 @@ define void @memset_12_stack() { ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: str w8, [sp, #8] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %buf = alloca [12 x i8], align 1 @@ -374,7 +374,7 @@ define void @memset_16_stack() { ; CHECK-NEXT: stp x8, x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %buf = alloca [16 x i8], align 1 @@ -387,7 +387,7 @@ define void @memset_20_stack() { ; CHECK-LABEL: memset_20_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 @@ -395,7 +395,7 @@ define void @memset_20_stack() { ; CHECK-NEXT: stp x8, x8, [sp, #8] ; CHECK-NEXT: str w8, [sp, #24] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret %buf = alloca [20 x i8], align 1 @@ -408,7 +408,7 @@ define void @memset_26_stack() { ; CHECK-LABEL: memset_26_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x8, #-6148914691236517206 @@ -417,7 +417,7 @@ define void @memset_26_stack() { ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: strh w8, [sp, #24] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret %buf = alloca [26 x i8], align 1 @@ -430,14 +430,14 @@ define void @memset_32_stack() { ; CHECK-LABEL: memset_32_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret %buf = alloca [32 x i8], align 1 @@ -450,7 +450,7 @@ define void @memset_40_stack() { ; CHECK-LABEL: memset_40_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 @@ -459,7 +459,7 @@ define void @memset_40_stack() { ; CHECK-NEXT: str x8, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %buf = alloca [40 x i8], align 1 @@ -472,7 +472,7 @@ define void @memset_64_stack() { ; CHECK-LABEL: memset_64_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 @@ -480,7 +480,7 @@ define void @memset_64_stack() { ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %buf = alloca [64 x i8], align 1 @@ -493,7 +493,7 @@ define void @memset_72_stack() { ; CHECK-LABEL: memset_72_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 @@ -503,7 +503,7 @@ define void @memset_72_stack() { ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %buf = alloca [72 x i8], align 1 @@ -516,7 +516,7 @@ define void @memset_128_stack() { ; CHECK-LABEL: memset_128_stack: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #144 -; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 @@ -526,7 +526,7 @@ define void @memset_128_stack() { ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something -; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #144 ; CHECK-NEXT: ret %buf = alloca [128 x i8], align 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll index 0d427c05e3b77..215590c91b714 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -1477,7 +1477,7 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { ; CHECK-LABEL: frem2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -1487,14 +1487,14 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { ; CHECK-NEXT: mov s1, v1.s[1] ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-NEXT: bl fmodf -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #64 @@ -1507,7 +1507,7 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { ; CHECK-LABEL: frem4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill @@ -1515,30 +1515,30 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { ; CHECK-NEXT: mov s1, v1.s[1] ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-NEXT: bl fmodf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: mov s1, v1.s[2] ; CHECK-NEXT: bl fmodf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: bl fmodf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #64 @@ -1564,7 +1564,7 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { ; CHECK-LABEL: frem2d64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill @@ -1572,14 +1572,14 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { ; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: bl fmod ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: bl fmod -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll index a96cad7e32dcd..1353fd281ff09 100644 --- a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll +++ b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll @@ -34,14 +34,14 @@ define void @odd() nounwind { ; CHECK-NOTMACHO: // %bb.0: ; CHECK-NOTMACHO-NEXT: stp d14, d12, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: stp d10, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-NOTMACHO-NEXT: str x27, [sp, #32] // 8-byte Folded Spill +; CHECK-NOTMACHO-NEXT: str x27, [sp, #32] // 8-byte Spill ; CHECK-NOTMACHO-NEXT: stp x25, x23, [sp, #48] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: stp x21, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: //APP ; CHECK-NOTMACHO-NEXT: mov x0, #42 // =0x2a ; CHECK-NOTMACHO-NEXT: //NO_APP ; CHECK-NOTMACHO-NEXT: ldp x21, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NOTMACHO-NEXT: ldr x27, [sp, #32] // 8-byte Folded Reload +; CHECK-NOTMACHO-NEXT: ldr x27, [sp, #32] // 8-byte Reload ; CHECK-NOTMACHO-NEXT: ldp x25, x23, [sp, #48] // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d10, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d14, d12, [sp], #80 // 16-byte Folded Reload @@ -81,14 +81,14 @@ define void @even() nounwind { ; CHECK-NOTMACHO: // %bb.0: ; CHECK-NOTMACHO-NEXT: stp d15, d13, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: stp d11, d9, [sp, #16] // 16-byte Folded Spill -; CHECK-NOTMACHO-NEXT: str x28, [sp, #32] // 8-byte Folded Spill +; CHECK-NOTMACHO-NEXT: str x28, [sp, #32] // 8-byte Spill ; CHECK-NOTMACHO-NEXT: stp x26, x24, [sp, #48] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: stp x22, x20, [sp, #64] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: //APP ; CHECK-NOTMACHO-NEXT: mov x0, #42 // =0x2a ; CHECK-NOTMACHO-NEXT: //NO_APP ; CHECK-NOTMACHO-NEXT: ldp x22, x20, [sp, #64] // 16-byte Folded Reload -; CHECK-NOTMACHO-NEXT: ldr x28, [sp, #32] // 8-byte Folded Reload +; CHECK-NOTMACHO-NEXT: ldr x28, [sp, #32] // 8-byte Reload ; CHECK-NOTMACHO-NEXT: ldp x26, x24, [sp, #48] // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d11, d9, [sp, #16] // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d15, d13, [sp], #80 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll index bf559da91901c..666f1cb7bcf6f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll +++ b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll @@ -145,7 +145,7 @@ define void @call_copy_pod() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x19, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x19, Pod @@ -154,7 +154,7 @@ define void @call_copy_pod() { ; CHECK-NEXT: bl copy_pod ; CHECK-NEXT: stp d0, d1, [x19] ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x19, 16 @@ -175,9 +175,9 @@ define void @call_copy_notcxx14aggregate() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .seh_stackalloc 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x19, 16 -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 24 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x19, NotCXX14Aggregate @@ -188,9 +188,9 @@ define void @call_copy_notcxx14aggregate() { ; CHECK-NEXT: ldp d0, d1, [sp] ; CHECK-NEXT: stp d0, d1, [x19] ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 24 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x19, 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: .seh_stackalloc 32 @@ -213,7 +213,7 @@ define void @call_copy_notpod() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x19, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x19, NotPod @@ -222,7 +222,7 @@ define void @call_copy_notpod() { ; CHECK-NEXT: bl copy_notpod ; CHECK-NEXT: stp x0, x1, [x19] ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x19, 16 diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll index 709a17e32f58e..35ffc99f7a405 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll @@ -236,7 +236,7 @@ define void @has_aligned_sret(ptr align 32 sret(%TSRet), i32) nounwind { ; CHECK-NEXT: .seh_save_any_reg_p q12, 96 ; CHECK-NEXT: stp q14, q15, [sp, #128] // 32-byte Folded Spill ; CHECK-NEXT: .seh_save_any_reg_p q14, 128 -; CHECK-NEXT: str x19, [sp, #160] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #160] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x19, 160 ; CHECK-NEXT: stp x29, x30, [sp, #168] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 168 @@ -253,7 +253,7 @@ define void @has_aligned_sret(ptr align 32 sret(%TSRet), i32) nounwind { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #168] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 168 -; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x19, 160 ; CHECK-NEXT: ldp q14, q15, [sp, #128] // 32-byte Folded Reload ; CHECK-NEXT: .seh_save_any_reg_p q14, 128 @@ -339,7 +339,7 @@ define [3 x i64] @large_array([3 x i64] %0, [2 x double], [2 x [2 x i64]]) nounw ; CHECK-NEXT: .seh_save_any_reg_p q12, 96 ; CHECK-NEXT: stp q14, q15, [sp, #128] // 32-byte Folded Spill ; CHECK-NEXT: .seh_save_any_reg_p q14, 128 -; CHECK-NEXT: str x19, [sp, #160] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #160] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x19, 160 ; CHECK-NEXT: stp x29, x30, [sp, #168] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 168 @@ -362,7 +362,7 @@ define [3 x i64] @large_array([3 x i64] %0, [2 x double], [2 x [2 x i64]]) nounw ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #168] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 168 -; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x19, 160 ; CHECK-NEXT: ldp q14, q15, [sp, #128] // 32-byte Folded Reload ; CHECK-NEXT: .seh_save_any_reg_p q14, 128 @@ -555,7 +555,7 @@ define <8 x i16> @large_vector(<8 x i16> %0) { ; CHECK-NEXT: .seh_save_any_reg_p q12, 96 ; CHECK-NEXT: stp q14, q15, [sp, #128] // 32-byte Folded Spill ; CHECK-NEXT: .seh_save_any_reg_p q14, 128 -; CHECK-NEXT: str x19, [sp, #160] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #160] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x19, 160 ; CHECK-NEXT: stp x29, x30, [sp, #168] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 168 @@ -571,7 +571,7 @@ define <8 x i16> @large_vector(<8 x i16> %0) { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #168] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 168 -; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x19, 160 ; CHECK-NEXT: ldp q14, q15, [sp, #128] // 32-byte Folded Reload ; CHECK-NEXT: .seh_save_any_reg_p q14, 128 diff --git a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll index 91ec870dd6d0c..b07f95e7fe474 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll @@ -10,17 +10,17 @@ define i32 @no_int_regs(i32 %x) nounwind { ; CHECK-LABEL: no_int_regs: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: str x27, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x27, [sp, #16] // 8-byte Spill ; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str w0, [sp, #28] // 4-byte Folded Spill +; CHECK-NEXT: str w0, [sp, #28] // 4-byte Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr w0, [sp, #28] // 4-byte Folded Reload +; CHECK-NEXT: ldr w0, [sp, #28] // 4-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -33,7 +33,7 @@ define i32 @one_int_reg(i32 %x) nounwind { ; CHECK-LABEL: one_int_reg: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: str x27, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x27, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov w30, w0 ; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill @@ -41,7 +41,7 @@ define i32 @one_int_reg(i32 %x) nounwind { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w30 ; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload @@ -60,11 +60,11 @@ define float @no_float_regs(float %x) nounwind { ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll index 389969bebaea4..979e09cfb5fac 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll @@ -49,7 +49,7 @@ define void @varargs_caller() nounwind { ; CHECK-NEXT: .weak_anti_dep "#varargs_callee" ; CHECK-NEXT: "#varargs_callee" = varargs_callee ; CHECK-NEXT: bl "#varargs_callee" -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> ) @@ -82,7 +82,7 @@ define void @varargs_many_argscalleer() nounwind { ; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 ; CHECK-NEXT: mov x4, sp ; CHECK-NEXT: mov w5, #16 // =0x10 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: stp x9, x8, [sp] ; CHECK-NEXT: stp q0, q0, [sp, #16] ; CHECK-NEXT: .weak_anti_dep varargs_many_argscallee @@ -90,7 +90,7 @@ define void @varargs_many_argscalleer() nounwind { ; CHECK-NEXT: .weak_anti_dep "#varargs_many_argscallee" ; CHECK-NEXT: "#varargs_many_argscallee" = varargs_many_argscallee ; CHECK-NEXT: bl "#varargs_many_argscallee" -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret call <2 x double> (double, double, double, <2 x double>, <2 x double>, ...) @@ -120,7 +120,7 @@ define void @varargs_caller_tail() nounwind { ; CHECK-NEXT: .weak_anti_dep "#varargs_callee" ; CHECK-NEXT: "#varargs_callee" = varargs_callee ; CHECK-NEXT: bl "#varargs_callee" -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add x4, sp, #48 ; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 ; CHECK-NEXT: mov w1, #4 // =0x4 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll index 71e0250b36972..731274149a24a 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -9,15 +9,15 @@ define i8 @test_rmw_add_8(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldrb w8, [x0] -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b .LBB0_1 ; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB0_2 Depth 2 -; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: add w12, w9, #1 ; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 @@ -32,12 +32,12 @@ define i8 @test_rmw_add_8(ptr %dst) { ; NOLSE-NEXT: .LBB0_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 ; NOLSE-NEXT: subs w9, w8, w9, uxtb -; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b.ne .LBB0_1 ; NOLSE-NEXT: b .LBB0_5 ; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end -; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -56,15 +56,15 @@ define i16 @test_rmw_add_16(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldrh w8, [x0] -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b .LBB1_1 ; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB1_2 Depth 2 -; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: add w12, w9, #1 ; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 @@ -79,12 +79,12 @@ define i16 @test_rmw_add_16(ptr %dst) { ; NOLSE-NEXT: .LBB1_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 ; NOLSE-NEXT: subs w9, w8, w9, uxth -; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b.ne .LBB1_1 ; NOLSE-NEXT: b .LBB1_5 ; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end -; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -103,15 +103,15 @@ define i32 @test_rmw_add_32(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldr w8, [x0] -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b .LBB2_1 ; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB2_2 Depth 2 -; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: add w12, w9, #1 ; NOLSE-NEXT: .LBB2_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 @@ -126,12 +126,12 @@ define i32 @test_rmw_add_32(ptr %dst) { ; NOLSE-NEXT: .LBB2_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 ; NOLSE-NEXT: subs w9, w8, w9 -; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b.ne .LBB2_1 ; NOLSE-NEXT: b .LBB2_5 ; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end -; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -150,15 +150,15 @@ define i64 @test_rmw_add_64(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldr x8, [x0] -; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Spill ; NOLSE-NEXT: b .LBB3_1 ; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB3_2 Depth 2 -; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: add x12, x9, #1 ; NOLSE-NEXT: .LBB3_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 @@ -173,12 +173,12 @@ define i64 @test_rmw_add_64(ptr %dst) { ; NOLSE-NEXT: .LBB3_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 ; NOLSE-NEXT: subs x9, x8, x9 -; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill -; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Spill +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Spill ; NOLSE-NEXT: b.ne .LBB3_1 ; NOLSE-NEXT: b .LBB3_5 ; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end -; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -198,18 +198,18 @@ define i128 @test_rmw_add_128(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #48 ; NOLSE-NEXT: .cfi_def_cfa_offset 48 -; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Spill ; NOLSE-NEXT: ldr x8, [x0, #8] ; NOLSE-NEXT: ldr x9, [x0] -; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; NOLSE-NEXT: b .LBB4_1 ; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB4_2 Depth 2 -; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Reload +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Reload ; NOLSE-NEXT: adds x14, x11, #1 ; NOLSE-NEXT: cinc x15, x13, hs ; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start @@ -233,18 +233,18 @@ define i128 @test_rmw_add_128(ptr %dst) { ; NOLSE-NEXT: .LBB4_5: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 ; NOLSE-NEXT: mov x8, x12 -; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Spill ; NOLSE-NEXT: mov x9, x10 -; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: subs x12, x12, x13 ; NOLSE-NEXT: ccmp x10, x11, #0, eq -; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; NOLSE-NEXT: b.ne .LBB4_1 ; NOLSE-NEXT: b .LBB4_6 ; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end -; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: add sp, sp, #48 ; NOLSE-NEXT: ret ; @@ -252,17 +252,17 @@ define i128 @test_rmw_add_128(ptr %dst) { ; LSE: // %bb.0: // %entry ; LSE-NEXT: sub sp, sp, #48 ; LSE-NEXT: .cfi_def_cfa_offset 48 -; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: str x0, [sp, #24] // 8-byte Spill ; LSE-NEXT: ldr x8, [x0, #8] ; LSE-NEXT: ldr x9, [x0] -; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; LSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; LSE-NEXT: b .LBB4_1 ; LSE-NEXT: .LBB4_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload -; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload -; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Reload +; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Reload +; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Reload ; LSE-NEXT: mov x0, x10 ; LSE-NEXT: mov x1, x11 ; LSE-NEXT: adds x2, x10, #1 @@ -271,18 +271,18 @@ define i128 @test_rmw_add_128(ptr %dst) { ; LSE-NEXT: mov x3, x9 ; LSE-NEXT: caspal x0, x1, x2, x3, [x8] ; LSE-NEXT: mov x9, x0 -; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #8] // 8-byte Spill ; LSE-NEXT: mov x8, x1 -; LSE-NEXT: str x8, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #16] // 8-byte Spill ; LSE-NEXT: subs x11, x8, x11 ; LSE-NEXT: ccmp x9, x10, #0, eq -; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; LSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; LSE-NEXT: b.ne .LBB4_1 ; LSE-NEXT: b .LBB4_2 ; LSE-NEXT: .LBB4_2: // %atomicrmw.end -; LSE-NEXT: ldr x1, [sp, #16] // 8-byte Folded Reload -; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: ldr x1, [sp, #16] // 8-byte Reload +; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; LSE-NEXT: add sp, sp, #48 ; LSE-NEXT: ret entry: @@ -294,15 +294,15 @@ define i8 @test_rmw_nand_8(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldrb w8, [x0] -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b .LBB5_1 ; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB5_2 Depth 2 -; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: mvn w8, w9 ; NOLSE-NEXT: orr w12, w8, #0xfffffffe ; NOLSE-NEXT: .LBB5_2: // %atomicrmw.start @@ -318,12 +318,12 @@ define i8 @test_rmw_nand_8(ptr %dst) { ; NOLSE-NEXT: .LBB5_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 ; NOLSE-NEXT: subs w9, w8, w9, uxtb -; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b.ne .LBB5_1 ; NOLSE-NEXT: b .LBB5_5 ; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end -; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -331,25 +331,25 @@ define i8 @test_rmw_nand_8(ptr %dst) { ; LSE: // %bb.0: // %entry ; LSE-NEXT: sub sp, sp, #32 ; LSE-NEXT: .cfi_def_cfa_offset 32 -; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; LSE-NEXT: ldrb w8, [x0] -; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; LSE-NEXT: b .LBB5_1 ; LSE-NEXT: .LBB5_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; LSE-NEXT: mvn w8, w9 ; LSE-NEXT: orr w10, w8, #0xfffffffe ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: casalb w8, w10, [x11] ; LSE-NEXT: subs w9, w8, w9, uxtb -; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; LSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; LSE-NEXT: b.ne .LBB5_1 ; LSE-NEXT: b .LBB5_2 ; LSE-NEXT: .LBB5_2: // %atomicrmw.end -; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; LSE-NEXT: add sp, sp, #32 ; LSE-NEXT: ret entry: @@ -362,15 +362,15 @@ define i16 @test_rmw_nand_16(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldrh w8, [x0] -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b .LBB6_1 ; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB6_2 Depth 2 -; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: mvn w8, w9 ; NOLSE-NEXT: orr w12, w8, #0xfffffffe ; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start @@ -386,12 +386,12 @@ define i16 @test_rmw_nand_16(ptr %dst) { ; NOLSE-NEXT: .LBB6_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 ; NOLSE-NEXT: subs w9, w8, w9, uxth -; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b.ne .LBB6_1 ; NOLSE-NEXT: b .LBB6_5 ; NOLSE-NEXT: .LBB6_5: // %atomicrmw.end -; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -399,25 +399,25 @@ define i16 @test_rmw_nand_16(ptr %dst) { ; LSE: // %bb.0: // %entry ; LSE-NEXT: sub sp, sp, #32 ; LSE-NEXT: .cfi_def_cfa_offset 32 -; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; LSE-NEXT: ldrh w8, [x0] -; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; LSE-NEXT: b .LBB6_1 ; LSE-NEXT: .LBB6_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; LSE-NEXT: mvn w8, w9 ; LSE-NEXT: orr w10, w8, #0xfffffffe ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: casalh w8, w10, [x11] ; LSE-NEXT: subs w9, w8, w9, uxth -; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; LSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; LSE-NEXT: b.ne .LBB6_1 ; LSE-NEXT: b .LBB6_2 ; LSE-NEXT: .LBB6_2: // %atomicrmw.end -; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; LSE-NEXT: add sp, sp, #32 ; LSE-NEXT: ret entry: @@ -430,15 +430,15 @@ define i32 @test_rmw_nand_32(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldr w8, [x0] -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b .LBB7_1 ; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB7_2 Depth 2 -; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: mvn w8, w9 ; NOLSE-NEXT: orr w12, w8, #0xfffffffe ; NOLSE-NEXT: .LBB7_2: // %atomicrmw.start @@ -454,12 +454,12 @@ define i32 @test_rmw_nand_32(ptr %dst) { ; NOLSE-NEXT: .LBB7_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 ; NOLSE-NEXT: subs w9, w8, w9 -; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; NOLSE-NEXT: b.ne .LBB7_1 ; NOLSE-NEXT: b .LBB7_5 ; NOLSE-NEXT: .LBB7_5: // %atomicrmw.end -; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -467,25 +467,25 @@ define i32 @test_rmw_nand_32(ptr %dst) { ; LSE: // %bb.0: // %entry ; LSE-NEXT: sub sp, sp, #32 ; LSE-NEXT: .cfi_def_cfa_offset 32 -; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; LSE-NEXT: ldr w8, [x0] -; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; LSE-NEXT: b .LBB7_1 ; LSE-NEXT: .LBB7_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload -; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; LSE-NEXT: mvn w8, w9 ; LSE-NEXT: orr w10, w8, #0xfffffffe ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: casal w8, w10, [x11] ; LSE-NEXT: subs w9, w8, w9 -; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: str w8, [sp, #12] // 4-byte Spill +; LSE-NEXT: str w8, [sp, #28] // 4-byte Spill ; LSE-NEXT: b.ne .LBB7_1 ; LSE-NEXT: b .LBB7_2 ; LSE-NEXT: .LBB7_2: // %atomicrmw.end -; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Reload ; LSE-NEXT: add sp, sp, #32 ; LSE-NEXT: ret entry: @@ -498,15 +498,15 @@ define i64 @test_rmw_nand_64(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #32 ; NOLSE-NEXT: .cfi_def_cfa_offset 32 -; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: ldr x8, [x0] -; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Spill ; NOLSE-NEXT: b .LBB8_1 ; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB8_2 Depth 2 -; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: mov w8, w9 ; NOLSE-NEXT: mvn w10, w8 ; NOLSE-NEXT: // implicit-def: $x8 @@ -525,12 +525,12 @@ define i64 @test_rmw_nand_64(ptr %dst) { ; NOLSE-NEXT: .LBB8_4: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 ; NOLSE-NEXT: subs x9, x8, x9 -; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill -; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Spill +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Spill ; NOLSE-NEXT: b.ne .LBB8_1 ; NOLSE-NEXT: b .LBB8_5 ; NOLSE-NEXT: .LBB8_5: // %atomicrmw.end -; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; NOLSE-NEXT: add sp, sp, #32 ; NOLSE-NEXT: ret ; @@ -538,14 +538,14 @@ define i64 @test_rmw_nand_64(ptr %dst) { ; LSE: // %bb.0: // %entry ; LSE-NEXT: sub sp, sp, #32 ; LSE-NEXT: .cfi_def_cfa_offset 32 -; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: str x0, [sp, #16] // 8-byte Spill ; LSE-NEXT: ldr x8, [x0] -; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #24] // 8-byte Spill ; LSE-NEXT: b .LBB8_1 ; LSE-NEXT: .LBB8_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload -; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Reload ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: mvn w10, w8 ; LSE-NEXT: // implicit-def: $x8 @@ -554,12 +554,12 @@ define i64 @test_rmw_nand_64(ptr %dst) { ; LSE-NEXT: mov x8, x9 ; LSE-NEXT: casal x8, x10, [x11] ; LSE-NEXT: subs x9, x8, x9 -; LSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #8] // 8-byte Spill +; LSE-NEXT: str x8, [sp, #24] // 8-byte Spill ; LSE-NEXT: b.ne .LBB8_1 ; LSE-NEXT: b .LBB8_2 ; LSE-NEXT: .LBB8_2: // %atomicrmw.end -; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; LSE-NEXT: add sp, sp, #32 ; LSE-NEXT: ret entry: @@ -572,18 +572,18 @@ define i128 @test_rmw_nand_128(ptr %dst) { ; NOLSE: // %bb.0: // %entry ; NOLSE-NEXT: sub sp, sp, #48 ; NOLSE-NEXT: .cfi_def_cfa_offset 48 -; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Spill ; NOLSE-NEXT: ldr x8, [x0, #8] ; NOLSE-NEXT: ldr x9, [x0] -; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; NOLSE-NEXT: b .LBB9_1 ; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB9_2 Depth 2 -; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Reload +; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Reload +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Reload ; NOLSE-NEXT: mov w8, w11 ; NOLSE-NEXT: mvn w10, w8 ; NOLSE-NEXT: // implicit-def: $x8 @@ -611,18 +611,18 @@ define i128 @test_rmw_nand_128(ptr %dst) { ; NOLSE-NEXT: .LBB9_5: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 ; NOLSE-NEXT: mov x8, x12 -; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Spill ; NOLSE-NEXT: mov x9, x10 -; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Spill ; NOLSE-NEXT: subs x12, x12, x13 ; NOLSE-NEXT: ccmp x10, x11, #0, eq -; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; NOLSE-NEXT: b.ne .LBB9_1 ; NOLSE-NEXT: b .LBB9_6 ; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end -; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload -; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Reload ; NOLSE-NEXT: add sp, sp, #48 ; NOLSE-NEXT: ret ; @@ -630,17 +630,17 @@ define i128 @test_rmw_nand_128(ptr %dst) { ; LSE: // %bb.0: // %entry ; LSE-NEXT: sub sp, sp, #48 ; LSE-NEXT: .cfi_def_cfa_offset 48 -; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: str x0, [sp, #24] // 8-byte Spill ; LSE-NEXT: ldr x8, [x0, #8] ; LSE-NEXT: ldr x9, [x0] -; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; LSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; LSE-NEXT: b .LBB9_1 ; LSE-NEXT: .LBB9_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload -; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload -; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Reload +; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Reload +; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Reload ; LSE-NEXT: mov x0, x10 ; LSE-NEXT: mov x1, x11 ; LSE-NEXT: mov w9, w10 @@ -653,18 +653,18 @@ define i128 @test_rmw_nand_128(ptr %dst) { ; LSE-NEXT: mov x3, x9 ; LSE-NEXT: caspal x0, x1, x2, x3, [x8] ; LSE-NEXT: mov x9, x0 -; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #8] // 8-byte Spill ; LSE-NEXT: mov x8, x1 -; LSE-NEXT: str x8, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #16] // 8-byte Spill ; LSE-NEXT: subs x11, x8, x11 ; LSE-NEXT: ccmp x9, x10, #0, eq -; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #32] // 8-byte Spill +; LSE-NEXT: str x8, [sp, #40] // 8-byte Spill ; LSE-NEXT: b.ne .LBB9_1 ; LSE-NEXT: b .LBB9_2 ; LSE-NEXT: .LBB9_2: // %atomicrmw.end -; LSE-NEXT: ldr x1, [sp, #16] // 8-byte Folded Reload -; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: ldr x1, [sp, #16] // 8-byte Reload +; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Reload ; LSE-NEXT: add sp, sp, #48 ; LSE-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll index 24a6c3c440e18..c06215204cd76 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll @@ -521,7 +521,7 @@ define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) ; NOLSE-NEXT: ldr q1, [x0] ; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; NOLSE-NEXT: mov x19, x0 -; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill +; NOLSE-NEXT: str q0, [sp] // 16-byte Spill ; NOLSE-NEXT: b .LBB6_2 ; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 @@ -534,11 +534,11 @@ define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB6_3 Depth 2 ; NOLSE-NEXT: mov v0.16b, v1.16b -; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Spill +; NOLSE-NEXT: ldr q1, [sp] // 16-byte Reload ; NOLSE-NEXT: bl __addtf3 ; NOLSE-NEXT: str q0, [sp, #48] -; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; NOLSE-NEXT: ldp x9, x8, [sp, #48] ; NOLSE-NEXT: str q0, [sp, #64] ; NOLSE-NEXT: ldp x11, x10, [sp, #64] @@ -573,15 +573,15 @@ define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) ; LSE-NEXT: ldr q1, [x0] ; LSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; LSE-NEXT: mov x19, x0 -; LSE-NEXT: str q0, [sp] // 16-byte Folded Spill +; LSE-NEXT: str q0, [sp] // 16-byte Spill ; LSE-NEXT: .LBB6_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov v0.16b, v1.16b -; LSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; LSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; LSE-NEXT: str q1, [sp, #16] // 16-byte Spill +; LSE-NEXT: ldr q1, [sp] // 16-byte Reload ; LSE-NEXT: bl __addtf3 ; LSE-NEXT: str q0, [sp, #48] -; LSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; LSE-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; LSE-NEXT: ldp x0, x1, [sp, #48] ; LSE-NEXT: str q0, [sp, #64] ; LSE-NEXT: ldp x2, x3, [sp, #64] diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll index 6bb541684c2bd..c73909603abb1 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll @@ -521,7 +521,7 @@ define fp128 @test_atomicrmw_fsub_fp128_seq_cst_align16(ptr %ptr, fp128 %value) ; NOLSE-NEXT: ldr q1, [x0] ; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; NOLSE-NEXT: mov x19, x0 -; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill +; NOLSE-NEXT: str q0, [sp] // 16-byte Spill ; NOLSE-NEXT: b .LBB6_2 ; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start ; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 @@ -534,11 +534,11 @@ define fp128 @test_atomicrmw_fsub_fp128_seq_cst_align16(ptr %ptr, fp128 %value) ; NOLSE-NEXT: // =>This Loop Header: Depth=1 ; NOLSE-NEXT: // Child Loop BB6_3 Depth 2 ; NOLSE-NEXT: mov v0.16b, v1.16b -; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Spill +; NOLSE-NEXT: ldr q1, [sp] // 16-byte Reload ; NOLSE-NEXT: bl __subtf3 ; NOLSE-NEXT: str q0, [sp, #48] -; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; NOLSE-NEXT: ldp x9, x8, [sp, #48] ; NOLSE-NEXT: str q0, [sp, #64] ; NOLSE-NEXT: ldp x11, x10, [sp, #64] @@ -573,15 +573,15 @@ define fp128 @test_atomicrmw_fsub_fp128_seq_cst_align16(ptr %ptr, fp128 %value) ; LSE-NEXT: ldr q1, [x0] ; LSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; LSE-NEXT: mov x19, x0 -; LSE-NEXT: str q0, [sp] // 16-byte Folded Spill +; LSE-NEXT: str q0, [sp] // 16-byte Spill ; LSE-NEXT: .LBB6_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov v0.16b, v1.16b -; LSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; LSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; LSE-NEXT: str q1, [sp, #16] // 16-byte Spill +; LSE-NEXT: ldr q1, [sp] // 16-byte Reload ; LSE-NEXT: bl __subtf3 ; LSE-NEXT: str q0, [sp, #48] -; LSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; LSE-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; LSE-NEXT: ldp x0, x1, [sp, #48] ; LSE-NEXT: str q0, [sp, #64] ; LSE-NEXT: ldp x2, x3, [sp, #64] diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll index c2bb032eed78e..59f6db6bf67a9 100644 --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -173,17 +173,17 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) { ; CHECK-LABEL: sign_4xi32_multi_use: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-NEXT: cmge v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: orr v1.4s, #1 -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl use_4xi1 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, diff --git a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll index aaa6ba36561df..1e4a695d1f4e8 100644 --- a/llvm/test/CodeGen/AArch64/combine-storetomstore.ll +++ b/llvm/test/CodeGen/AArch64/combine-storetomstore.ll @@ -866,7 +866,7 @@ define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mas ; SVE-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill ; SVE-NEXT: ldp q1, q3, [x0] ; SVE-NEXT: movi v0.2d, #0000000000000000 -; SVE-NEXT: str d8, [sp, #64] // 8-byte Folded Spill +; SVE-NEXT: str d8, [sp, #64] // 8-byte Spill ; SVE-NEXT: fmov d8, d2 ; SVE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; SVE-NEXT: mov x19, x0 @@ -878,14 +878,14 @@ define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mas ; SVE-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload ; SVE-NEXT: zip1 v1.8b, v8.8b, v0.8b ; SVE-NEXT: ushll v0.4s, v0.4h, #0 -; SVE-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload +; SVE-NEXT: ldr d8, [sp, #64] // 8-byte Reload ; SVE-NEXT: shl v0.4s, v0.4s, #31 ; SVE-NEXT: ushll v1.4s, v1.4h, #0 ; SVE-NEXT: cmlt v0.4s, v0.4s, #0 ; SVE-NEXT: shl v1.4s, v1.4s, #31 ; SVE-NEXT: bsl v0.16b, v2.16b, v3.16b -; SVE-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload -; SVE-NEXT: ldr q3, [sp] // 16-byte Folded Reload +; SVE-NEXT: ldr q2, [sp, #48] // 16-byte Reload +; SVE-NEXT: ldr q3, [sp] // 16-byte Reload ; SVE-NEXT: cmlt v1.4s, v1.4s, #0 ; SVE-NEXT: bsl v1.16b, v2.16b, v3.16b ; SVE-NEXT: stp q1, q0, [x19] diff --git a/llvm/test/CodeGen/AArch64/exception-handling-windows-elf.ll b/llvm/test/CodeGen/AArch64/exception-handling-windows-elf.ll index f38bb8613b7b0..1f1ce4da0f618 100644 --- a/llvm/test/CodeGen/AArch64/exception-handling-windows-elf.ll +++ b/llvm/test/CodeGen/AArch64/exception-handling-windows-elf.ll @@ -13,14 +13,14 @@ define void @bar() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: bar: ; CHECK: // %bb.0: // %continue ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, :got:foo ; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:foo] ; CHECK-NEXT: blr x8 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %exn.slot = alloca ptr diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll index 76d29a50416e3..198b0d58d5ba0 100644 --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -185,12 +185,12 @@ define double @test7(double %a, double %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fmov d2, #-2.00000000 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fadd d8, d0, d1 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: bl use -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fcmp-fp128.ll b/llvm/test/CodeGen/AArch64/fcmp-fp128.ll index a2b4b61864741..baaa615f8ec23 100644 --- a/llvm/test/CodeGen/AArch64/fcmp-fp128.ll +++ b/llvm/test/CodeGen/AArch64/fcmp-fp128.ll @@ -8,7 +8,7 @@ define double @oeq(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: oeq: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -17,7 +17,7 @@ define double @oeq(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __eqtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, eq ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -25,7 +25,7 @@ define double @oeq(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: oeq: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -34,7 +34,7 @@ define double @oeq(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __eqtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, eq ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -48,7 +48,7 @@ define double @ogt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: ogt: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -57,7 +57,7 @@ define double @ogt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, gt ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -65,7 +65,7 @@ define double @ogt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: ogt: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -74,7 +74,7 @@ define double @ogt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, gt ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -88,7 +88,7 @@ define double @olt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: olt: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -97,7 +97,7 @@ define double @olt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, mi ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -105,7 +105,7 @@ define double @olt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: olt: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -114,7 +114,7 @@ define double @olt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, mi ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -128,7 +128,7 @@ define double @ole(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: ole: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -137,7 +137,7 @@ define double @ole(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __letf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, le ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -145,7 +145,7 @@ define double @ole(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: ole: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -154,7 +154,7 @@ define double @ole(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __letf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, le ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -226,7 +226,7 @@ define double @ord(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: ord: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -235,7 +235,7 @@ define double @ord(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, eq ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -243,7 +243,7 @@ define double @ord(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: ord: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -252,7 +252,7 @@ define double @ord(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, eq ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -266,7 +266,7 @@ define double @uno(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: uno: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -275,7 +275,7 @@ define double @uno(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, ne ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -283,7 +283,7 @@ define double @uno(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: uno: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -292,7 +292,7 @@ define double @uno(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, ne ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -362,7 +362,7 @@ define double @ugt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: ugt: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -371,7 +371,7 @@ define double @ugt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __letf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, gt ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -379,7 +379,7 @@ define double @ugt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: ugt: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -388,7 +388,7 @@ define double @ugt(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __letf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, gt ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -402,7 +402,7 @@ define double @uge(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: uge: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -411,7 +411,7 @@ define double @uge(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, pl ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -419,7 +419,7 @@ define double @uge(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: uge: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -428,7 +428,7 @@ define double @uge(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, pl ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -442,7 +442,7 @@ define double @ult(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: ult: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -451,7 +451,7 @@ define double @ult(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __getf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, mi ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -459,7 +459,7 @@ define double @ult(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: ult: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -468,7 +468,7 @@ define double @ult(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __getf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, mi ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -482,7 +482,7 @@ define double @ule(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: ule: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -491,7 +491,7 @@ define double @ule(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, le ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -499,7 +499,7 @@ define double @ule(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: ule: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -508,7 +508,7 @@ define double @ule(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, le ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -522,7 +522,7 @@ define double @une(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: une: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -531,7 +531,7 @@ define double @une(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __netf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, ne ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -539,7 +539,7 @@ define double @une(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: une: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -548,7 +548,7 @@ define double @une(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __netf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, ne ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll index 30fb82ea5c517..d360a9da7d966 100644 --- a/llvm/test/CodeGen/AArch64/fcmp.ll +++ b/llvm/test/CodeGen/AArch64/fcmp.ll @@ -8,32 +8,32 @@ define fp128 @f128_fp128(fp128 %a, fp128 %b, fp128 %d, fp128 %e) { ; CHECK-SD-LABEL: f128_fp128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q2, q3, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: bl __lttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: b.pl .LBB0_2 ; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: .LBB0_2: // %entry -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: f128_fp128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q2, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov d0, v2.d[1] ; CHECK-GI-NEXT: mov d1, v3.d[1] ; CHECK-GI-NEXT: fcsel d2, d2, d3, mi @@ -54,7 +54,7 @@ define i128 @f128_i128(fp128 %a, fp128 %b, i128 %d, i128 %e) { ; CHECK-SD-LABEL: f128_i128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 @@ -75,7 +75,7 @@ define i128 @f128_i128(fp128 %a, fp128 %b, i128 %d, i128 %e) { ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: mov w8, w0 ; CHECK-SD-NEXT: mov x0, x20 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: cmp w8, #0 ; CHECK-SD-NEXT: csel x1, x21, x19, mi ; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload @@ -116,7 +116,7 @@ define double @f128_double(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-LABEL: f128_double: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -125,7 +125,7 @@ define double @f128_double(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-SD-NEXT: fmov d9, d2 ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel d0, d9, d8, mi ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -133,7 +133,7 @@ define double @f128_double(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-LABEL: f128_double: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -142,7 +142,7 @@ define double @f128_double(fp128 %a, fp128 %b, double %d, double %e) { ; CHECK-GI-NEXT: fmov d9, d3 ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel d0, d8, d9, mi ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -156,7 +156,7 @@ define float @f128_float(fp128 %a, fp128 %b, float %d, float %e) { ; CHECK-SD-LABEL: f128_float: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: .cfi_offset b8, -24 @@ -165,7 +165,7 @@ define float @f128_float(fp128 %a, fp128 %b, float %d, float %e) { ; CHECK-SD-NEXT: fmov s9, s2 ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: fcsel s0, s9, s8, mi ; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -173,7 +173,7 @@ define float @f128_float(fp128 %a, fp128 %b, float %d, float %e) { ; CHECK-GI-LABEL: f128_float: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -182,7 +182,7 @@ define float @f128_float(fp128 %a, fp128 %b, float %d, float %e) { ; CHECK-GI-NEXT: fmov s9, s3 ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: fcsel s0, s8, s9, mi ; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -236,7 +236,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) { ; CHECK-SD-NOFP16-LABEL: f128_half: ; CHECK-SD-NOFP16: // %bb.0: // %entry ; CHECK-SD-NOFP16-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NOFP16-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NOFP16-NEXT: .cfi_offset b8, -24 @@ -245,7 +245,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) { ; CHECK-SD-NOFP16-NEXT: fmov s9, s2 ; CHECK-SD-NOFP16-NEXT: bl __lttf2 ; CHECK-SD-NOFP16-NEXT: cmp w0, #0 -; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NOFP16-NEXT: fcsel s0, s9, s8, mi ; CHECK-SD-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-SD-NOFP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload @@ -254,7 +254,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) { ; CHECK-SD-FP16-LABEL: f128_half: ; CHECK-SD-FP16: // %bb.0: // %entry ; CHECK-SD-FP16-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-FP16-NEXT: .cfi_offset w30, -16 ; CHECK-SD-FP16-NEXT: .cfi_offset b8, -24 @@ -263,7 +263,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) { ; CHECK-SD-FP16-NEXT: fmov s9, s2 ; CHECK-SD-FP16-NEXT: bl __lttf2 ; CHECK-SD-FP16-NEXT: cmp w0, #0 -; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-FP16-NEXT: fcsel h0, h9, h8, mi ; CHECK-SD-FP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload ; CHECK-SD-FP16-NEXT: ret @@ -271,7 +271,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) { ; CHECK-GI-LABEL: f128_half: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -282,7 +282,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) { ; CHECK-GI-NEXT: fmov w8, s8 ; CHECK-GI-NEXT: fmov w9, s9 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: csel w8, w8, w9, mi ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 @@ -429,7 +429,7 @@ define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d, ; CHECK-SD-LABEL: v2f128_fp128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 112 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill @@ -440,19 +440,19 @@ define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d, ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: b.pl .LBB12_2 ; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-SD-NEXT: .LBB12_2: // %entry ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __lttf2 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: b.pl .LBB12_4 ; CHECK-SD-NEXT: // %bb.3: // %entry -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: .LBB12_4: // %entry -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #112 ; CHECK-SD-NEXT: ret ; @@ -504,7 +504,7 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, ; CHECK-SD-LABEL: v3f128_fp128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 112 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill @@ -516,7 +516,7 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, ; CHECK-SD-NEXT: b.mi .LBB13_2 ; CHECK-SD-NEXT: // %bb.1: ; CHECK-SD-NEXT: ldr q0, [sp, #128] -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: .LBB13_2: // %entry ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __lttf2 @@ -524,7 +524,7 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, ; CHECK-SD-NEXT: b.mi .LBB13_4 ; CHECK-SD-NEXT: // %bb.3: ; CHECK-SD-NEXT: ldr q0, [sp, #144] -; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-SD-NEXT: .LBB13_4: // %entry ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __lttf2 @@ -534,14 +534,14 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, ; CHECK-SD-NEXT: csel x8, x9, x8, mi ; CHECK-SD-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload ; CHECK-SD-NEXT: ldr q2, [x8] -; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #112 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: v3f128_fp128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #192 -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 192 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -551,13 +551,13 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, ; CHECK-GI-NEXT: mov v1.16b, v3.16b ; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q2, [sp, #192] -; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: ldr q2, [sp, #208] ; CHECK-GI-NEXT: stp q2, q6, [sp, #64] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q2, [sp, #224] ; CHECK-GI-NEXT: stp q7, q2, [sp, #96] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q2, [sp, #240] -; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 @@ -568,7 +568,7 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, ; CHECK-GI-NEXT: ldp q5, q4, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w19, #0 ; CHECK-GI-NEXT: ldp q7, q6, [sp, #96] // 32-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: mov d0, v4.d[1] ; CHECK-GI-NEXT: mov d1, v5.d[1] ; CHECK-GI-NEXT: fcsel d4, d4, d5, mi @@ -610,26 +610,26 @@ define <2 x double> @v2f128_double(<2 x fp128> %a, <2 x fp128> %b, <2 x double> ; CHECK-SD-LABEL: v2f128_double: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: mov v1.16b, v3.16b ; CHECK-SD-NEXT: stp q4, q5, [sp, #48] // 32-byte Folded Spill -; CHECK-SD-NEXT: str q2, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cset w8, mi ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d0, x8 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: cset w8, mi ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d0, x8 @@ -675,40 +675,40 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double> ; CHECK-SD-LABEL: v3f128_double: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #160 -; CHECK-SD-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 160 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q2, q5, [sp, #112] // 32-byte Folded Spill ; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6 ; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7 ; CHECK-SD-NEXT: ldr d5, [sp, #184] -; CHECK-SD-NEXT: str q3, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q3, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp d3, d2, [sp, #168] ; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: mov v1.16b, v4.16b -; CHECK-SD-NEXT: str q5, [sp, #96] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q5, [sp, #96] // 16-byte Spill ; CHECK-SD-NEXT: ldr d5, [sp, #160] ; CHECK-SD-NEXT: mov v3.d[1], v2.d[0] -; CHECK-SD-NEXT: str q5, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q5, [sp, #80] // 16-byte Spill ; CHECK-SD-NEXT: stp q6, q3, [sp, #32] // 32-byte Folded Spill ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: cset w8, mi ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d0, x8 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cset w8, mi ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 ; CHECK-SD-NEXT: fmov d1, x8 ; CHECK-SD-NEXT: mov v1.d[1], v0.d[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __lttf2 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload @@ -716,8 +716,8 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double> ; CHECK-SD-NEXT: ldp q2, q4, [sp, #64] // 32-byte Folded Reload ; CHECK-SD-NEXT: cset w8, mi ; CHECK-SD-NEXT: sbfx x8, x8, #0, #1 -; CHECK-SD-NEXT: ldr q3, [sp, #96] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q3, [sp, #96] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: fmov d2, x8 ; CHECK-SD-NEXT: bsl v2.16b, v4.16b, v3.16b @@ -731,7 +731,7 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double> ; CHECK-GI-LABEL: v3f128_double: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #128] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 @@ -748,8 +748,8 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double> ; CHECK-GI-NEXT: ldr x20, [sp, #200] ; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6 ; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7 -; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q2, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Spill +; CHECK-GI-NEXT: str q2, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: ldr d2, [sp, #192] ; CHECK-GI-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __lttf2 @@ -767,7 +767,7 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double> ; CHECK-GI-NEXT: sbfx x9, x22, #0, #1 ; CHECK-GI-NEXT: fmov d1, x8 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-GI-NEXT: mov v2.d[1], v0.d[0] ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: cset w8, mi diff --git a/llvm/test/CodeGen/AArch64/fexplog.ll b/llvm/test/CodeGen/AArch64/fexplog.ll index f13e2fcd1c448..b136af79fc502 100644 --- a/llvm/test/CodeGen/AArch64/fexplog.ll +++ b/llvm/test/CodeGen/AArch64/fexplog.ll @@ -63,20 +63,20 @@ define <2 x double> @exp_v2f64(<2 x double> %a) { ; CHECK-SD-LABEL: exp_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl exp ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl exp -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -84,8 +84,8 @@ define <2 x double> @exp_v2f64(<2 x double> %a) { ; CHECK-GI-LABEL: exp_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -93,13 +93,13 @@ define <2 x double> @exp_v2f64(<2 x double> %a) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl exp ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl exp -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -114,7 +114,7 @@ define <3 x double> @exp_v3f64(<3 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -131,7 +131,7 @@ define <3 x double> @exp_v3f64(<3 x double> %a) { ; CHECK-SD-NEXT: bl exp ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -141,7 +141,7 @@ define <3 x double> @exp_v3f64(<3 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -158,7 +158,7 @@ define <3 x double> @exp_v3f64(<3 x double> %a) { ; CHECK-GI-NEXT: bl exp ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -172,33 +172,33 @@ define <4 x double> @exp_v4f64(<4 x double> %a) { ; CHECK-SD-LABEL: exp_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl exp ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl exp -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl exp ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl exp ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -207,35 +207,35 @@ define <4 x double> @exp_v4f64(<4 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl exp ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl exp ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl exp ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl exp ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -249,21 +249,21 @@ define <2 x float> @exp_v2f32(<2 x float> %a) { ; CHECK-SD-LABEL: exp_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -272,8 +272,8 @@ define <2 x float> @exp_v2f32(<2 x float> %a) { ; CHECK-GI-LABEL: exp_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -282,13 +282,13 @@ define <2 x float> @exp_v2f32(<2 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -302,27 +302,27 @@ define <3 x float> @exp_v3f32(<3 x float> %a) { ; CHECK-SD-LABEL: exp_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -332,7 +332,7 @@ define <3 x float> @exp_v3f32(<3 x float> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -342,16 +342,16 @@ define <3 x float> @exp_v3f32(<3 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -367,34 +367,34 @@ define <4 x float> @exp_v4f32(<4 x float> %a) { ; CHECK-SD-LABEL: exp_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -403,9 +403,9 @@ define <4 x float> @exp_v4f32(<4 x float> %a) { ; CHECK-GI-LABEL: exp_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -417,24 +417,24 @@ define <4 x float> @exp_v4f32(<4 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -449,61 +449,61 @@ define <8 x float> @exp_v8f32(<8 x float> %a) { ; CHECK-SD-LABEL: exp_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -514,7 +514,7 @@ define <8 x float> @exp_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -523,7 +523,7 @@ define <8 x float> @exp_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: .cfi_offset b12, -56 ; CHECK-GI-NEXT: .cfi_offset b13, -64 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] @@ -533,48 +533,48 @@ define <8 x float> @exp_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -591,69 +591,69 @@ define <7 x half> @exp_v7f16(<7 x half> %a) { ; CHECK-SD-LABEL: exp_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -664,7 +664,7 @@ define <7 x half> @exp_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -683,47 +683,47 @@ define <7 x half> @exp_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -738,39 +738,39 @@ define <4 x half> @exp_v4f16(<4 x half> %a) { ; CHECK-SD-LABEL: exp_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl expf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -779,9 +779,9 @@ define <4 x half> @exp_v4f16(<4 x half> %a) { ; CHECK-GI-LABEL: exp_v4f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -795,25 +795,25 @@ define <4 x half> @exp_v4f16(<4 x half> %a) { ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -830,69 +830,69 @@ define <8 x half> @exp_v8f16(<8 x half> %a) { ; CHECK-SD-LABEL: exp_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -900,11 +900,11 @@ define <8 x half> @exp_v8f16(<8 x half> %a) { ; CHECK-GI-LABEL: exp_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -925,47 +925,47 @@ define <8 x half> @exp_v8f16(<8 x half> %a) { ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -987,7 +987,7 @@ define <16 x half> @exp_v16f16(<16 x half> %a) { ; CHECK-SD-LABEL: exp_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill @@ -995,121 +995,121 @@ define <16 x half> @exp_v16f16(<16 x half> %a) { ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl expf ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -1136,7 +1136,7 @@ define <16 x half> @exp_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v2.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v1.h[1] ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] @@ -1147,132 +1147,132 @@ define <16 x half> @exp_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: mov h12, v0.h[6] ; CHECK-GI-NEXT: mov h13, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl expf -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -1287,17 +1287,17 @@ define <2 x fp128> @exp_v2fp128(<2 x fp128> %a) { ; CHECK-LABEL: exp_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl expl -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl expl ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: @@ -1366,20 +1366,20 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) { ; CHECK-SD-LABEL: exp2_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl exp2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl exp2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -1387,8 +1387,8 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) { ; CHECK-GI-LABEL: exp2_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -1396,13 +1396,13 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl exp2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl exp2 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -1417,7 +1417,7 @@ define <3 x double> @exp2_v3f64(<3 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -1434,7 +1434,7 @@ define <3 x double> @exp2_v3f64(<3 x double> %a) { ; CHECK-SD-NEXT: bl exp2 ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -1444,7 +1444,7 @@ define <3 x double> @exp2_v3f64(<3 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -1461,7 +1461,7 @@ define <3 x double> @exp2_v3f64(<3 x double> %a) { ; CHECK-GI-NEXT: bl exp2 ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -1475,33 +1475,33 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) { ; CHECK-SD-LABEL: exp2_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl exp2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl exp2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl exp2 ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -1510,35 +1510,35 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl exp2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl exp2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl exp2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl exp2 ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -1552,21 +1552,21 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) { ; CHECK-SD-LABEL: exp2_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1575,8 +1575,8 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) { ; CHECK-GI-LABEL: exp2_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -1585,13 +1585,13 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -1605,27 +1605,27 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) { ; CHECK-SD-LABEL: exp2_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1635,7 +1635,7 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -1645,16 +1645,16 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -1670,34 +1670,34 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) { ; CHECK-SD-LABEL: exp2_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1706,9 +1706,9 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) { ; CHECK-GI-LABEL: exp2_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -1720,24 +1720,24 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -1752,61 +1752,61 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) { ; CHECK-SD-LABEL: exp2_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -1817,7 +1817,7 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -1826,7 +1826,7 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: .cfi_offset b12, -56 ; CHECK-GI-NEXT: .cfi_offset b13, -64 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] @@ -1836,48 +1836,48 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -1894,69 +1894,69 @@ define <7 x half> @exp2_v7f16(<7 x half> %a) { ; CHECK-SD-LABEL: exp2_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -1967,7 +1967,7 @@ define <7 x half> @exp2_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -1986,47 +1986,47 @@ define <7 x half> @exp2_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -2041,39 +2041,39 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) { ; CHECK-SD-LABEL: exp2_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -2082,9 +2082,9 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) { ; CHECK-GI-LABEL: exp2_v4f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -2098,25 +2098,25 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) { ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -2133,69 +2133,69 @@ define <8 x half> @exp2_v8f16(<8 x half> %a) { ; CHECK-SD-LABEL: exp2_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -2203,11 +2203,11 @@ define <8 x half> @exp2_v8f16(<8 x half> %a) { ; CHECK-GI-LABEL: exp2_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -2228,47 +2228,47 @@ define <8 x half> @exp2_v8f16(<8 x half> %a) { ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -2290,7 +2290,7 @@ define <16 x half> @exp2_v16f16(<16 x half> %a) { ; CHECK-SD-LABEL: exp2_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill @@ -2298,121 +2298,121 @@ define <16 x half> @exp2_v16f16(<16 x half> %a) { ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl exp2f ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -2439,7 +2439,7 @@ define <16 x half> @exp2_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v2.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v1.h[1] ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] @@ -2450,132 +2450,132 @@ define <16 x half> @exp2_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: mov h12, v0.h[6] ; CHECK-GI-NEXT: mov h13, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl exp2f -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -2590,17 +2590,17 @@ define <2 x fp128> @exp2_v2fp128(<2 x fp128> %a) { ; CHECK-LABEL: exp2_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl exp2l -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl exp2l ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: @@ -2669,20 +2669,20 @@ define <2 x double> @log_v2f64(<2 x double> %a) { ; CHECK-SD-LABEL: log_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl log ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -2690,8 +2690,8 @@ define <2 x double> @log_v2f64(<2 x double> %a) { ; CHECK-GI-LABEL: log_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -2699,13 +2699,13 @@ define <2 x double> @log_v2f64(<2 x double> %a) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl log -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -2720,7 +2720,7 @@ define <3 x double> @log_v3f64(<3 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -2737,7 +2737,7 @@ define <3 x double> @log_v3f64(<3 x double> %a) { ; CHECK-SD-NEXT: bl log ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -2747,7 +2747,7 @@ define <3 x double> @log_v3f64(<3 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -2764,7 +2764,7 @@ define <3 x double> @log_v3f64(<3 x double> %a) { ; CHECK-GI-NEXT: bl log ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -2778,33 +2778,33 @@ define <4 x double> @log_v4f64(<4 x double> %a) { ; CHECK-SD-LABEL: log_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl log ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -2813,35 +2813,35 @@ define <4 x double> @log_v4f64(<4 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl log ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl log ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -2855,21 +2855,21 @@ define <2 x float> @log_v2f32(<2 x float> %a) { ; CHECK-SD-LABEL: log_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -2878,8 +2878,8 @@ define <2 x float> @log_v2f32(<2 x float> %a) { ; CHECK-GI-LABEL: log_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -2888,13 +2888,13 @@ define <2 x float> @log_v2f32(<2 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -2908,27 +2908,27 @@ define <3 x float> @log_v3f32(<3 x float> %a) { ; CHECK-SD-LABEL: log_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -2938,7 +2938,7 @@ define <3 x float> @log_v3f32(<3 x float> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -2948,16 +2948,16 @@ define <3 x float> @log_v3f32(<3 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -2973,34 +2973,34 @@ define <4 x float> @log_v4f32(<4 x float> %a) { ; CHECK-SD-LABEL: log_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -3009,9 +3009,9 @@ define <4 x float> @log_v4f32(<4 x float> %a) { ; CHECK-GI-LABEL: log_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -3023,24 +3023,24 @@ define <4 x float> @log_v4f32(<4 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -3055,61 +3055,61 @@ define <8 x float> @log_v8f32(<8 x float> %a) { ; CHECK-SD-LABEL: log_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -3120,7 +3120,7 @@ define <8 x float> @log_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -3129,7 +3129,7 @@ define <8 x float> @log_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: .cfi_offset b12, -56 ; CHECK-GI-NEXT: .cfi_offset b13, -64 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] @@ -3139,48 +3139,48 @@ define <8 x float> @log_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -3197,69 +3197,69 @@ define <7 x half> @log_v7f16(<7 x half> %a) { ; CHECK-SD-LABEL: log_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -3270,7 +3270,7 @@ define <7 x half> @log_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -3289,47 +3289,47 @@ define <7 x half> @log_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -3344,39 +3344,39 @@ define <4 x half> @log_v4f16(<4 x half> %a) { ; CHECK-SD-LABEL: log_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl logf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -3385,9 +3385,9 @@ define <4 x half> @log_v4f16(<4 x half> %a) { ; CHECK-GI-LABEL: log_v4f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -3401,25 +3401,25 @@ define <4 x half> @log_v4f16(<4 x half> %a) { ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -3436,69 +3436,69 @@ define <8 x half> @log_v8f16(<8 x half> %a) { ; CHECK-SD-LABEL: log_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -3506,11 +3506,11 @@ define <8 x half> @log_v8f16(<8 x half> %a) { ; CHECK-GI-LABEL: log_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -3531,47 +3531,47 @@ define <8 x half> @log_v8f16(<8 x half> %a) { ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -3593,7 +3593,7 @@ define <16 x half> @log_v16f16(<16 x half> %a) { ; CHECK-SD-LABEL: log_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill @@ -3601,121 +3601,121 @@ define <16 x half> @log_v16f16(<16 x half> %a) { ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl logf ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -3742,7 +3742,7 @@ define <16 x half> @log_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v2.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v1.h[1] ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] @@ -3753,132 +3753,132 @@ define <16 x half> @log_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: mov h12, v0.h[6] ; CHECK-GI-NEXT: mov h13, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl logf -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -3893,17 +3893,17 @@ define <2 x fp128> @log_v2fp128(<2 x fp128> %a) { ; CHECK-LABEL: log_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl logl -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl logl ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: @@ -3972,20 +3972,20 @@ define <2 x double> @log2_v2f64(<2 x double> %a) { ; CHECK-SD-LABEL: log2_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl log2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -3993,8 +3993,8 @@ define <2 x double> @log2_v2f64(<2 x double> %a) { ; CHECK-GI-LABEL: log2_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -4002,13 +4002,13 @@ define <2 x double> @log2_v2f64(<2 x double> %a) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl log2 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -4023,7 +4023,7 @@ define <3 x double> @log2_v3f64(<3 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -4040,7 +4040,7 @@ define <3 x double> @log2_v3f64(<3 x double> %a) { ; CHECK-SD-NEXT: bl log2 ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -4050,7 +4050,7 @@ define <3 x double> @log2_v3f64(<3 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -4067,7 +4067,7 @@ define <3 x double> @log2_v3f64(<3 x double> %a) { ; CHECK-GI-NEXT: bl log2 ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -4081,33 +4081,33 @@ define <4 x double> @log2_v4f64(<4 x double> %a) { ; CHECK-SD-LABEL: log2_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl log2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log2 ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -4116,35 +4116,35 @@ define <4 x double> @log2_v4f64(<4 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl log2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl log2 ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -4158,21 +4158,21 @@ define <2 x float> @log2_v2f32(<2 x float> %a) { ; CHECK-SD-LABEL: log2_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -4181,8 +4181,8 @@ define <2 x float> @log2_v2f32(<2 x float> %a) { ; CHECK-GI-LABEL: log2_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -4191,13 +4191,13 @@ define <2 x float> @log2_v2f32(<2 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -4211,27 +4211,27 @@ define <3 x float> @log2_v3f32(<3 x float> %a) { ; CHECK-SD-LABEL: log2_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -4241,7 +4241,7 @@ define <3 x float> @log2_v3f32(<3 x float> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -4251,16 +4251,16 @@ define <3 x float> @log2_v3f32(<3 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -4276,34 +4276,34 @@ define <4 x float> @log2_v4f32(<4 x float> %a) { ; CHECK-SD-LABEL: log2_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -4312,9 +4312,9 @@ define <4 x float> @log2_v4f32(<4 x float> %a) { ; CHECK-GI-LABEL: log2_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -4326,24 +4326,24 @@ define <4 x float> @log2_v4f32(<4 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -4358,61 +4358,61 @@ define <8 x float> @log2_v8f32(<8 x float> %a) { ; CHECK-SD-LABEL: log2_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -4423,7 +4423,7 @@ define <8 x float> @log2_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -4432,7 +4432,7 @@ define <8 x float> @log2_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: .cfi_offset b12, -56 ; CHECK-GI-NEXT: .cfi_offset b13, -64 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] @@ -4442,48 +4442,48 @@ define <8 x float> @log2_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -4500,69 +4500,69 @@ define <7 x half> @log2_v7f16(<7 x half> %a) { ; CHECK-SD-LABEL: log2_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -4573,7 +4573,7 @@ define <7 x half> @log2_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -4592,47 +4592,47 @@ define <7 x half> @log2_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -4647,39 +4647,39 @@ define <4 x half> @log2_v4f16(<4 x half> %a) { ; CHECK-SD-LABEL: log2_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log2f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -4688,9 +4688,9 @@ define <4 x half> @log2_v4f16(<4 x half> %a) { ; CHECK-GI-LABEL: log2_v4f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -4704,25 +4704,25 @@ define <4 x half> @log2_v4f16(<4 x half> %a) { ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -4739,69 +4739,69 @@ define <8 x half> @log2_v8f16(<8 x half> %a) { ; CHECK-SD-LABEL: log2_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -4809,11 +4809,11 @@ define <8 x half> @log2_v8f16(<8 x half> %a) { ; CHECK-GI-LABEL: log2_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -4834,47 +4834,47 @@ define <8 x half> @log2_v8f16(<8 x half> %a) { ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -4896,7 +4896,7 @@ define <16 x half> @log2_v16f16(<16 x half> %a) { ; CHECK-SD-LABEL: log2_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill @@ -4904,121 +4904,121 @@ define <16 x half> @log2_v16f16(<16 x half> %a) { ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log2f ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -5045,7 +5045,7 @@ define <16 x half> @log2_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v2.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v1.h[1] ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] @@ -5056,132 +5056,132 @@ define <16 x half> @log2_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: mov h12, v0.h[6] ; CHECK-GI-NEXT: mov h13, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log2f -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -5196,17 +5196,17 @@ define <2 x fp128> @log2_v2fp128(<2 x fp128> %a) { ; CHECK-LABEL: log2_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl log2l -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl log2l ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: @@ -5275,20 +5275,20 @@ define <2 x double> @log10_v2f64(<2 x double> %a) { ; CHECK-SD-LABEL: log10_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl log10 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log10 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -5296,8 +5296,8 @@ define <2 x double> @log10_v2f64(<2 x double> %a) { ; CHECK-GI-LABEL: log10_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -5305,13 +5305,13 @@ define <2 x double> @log10_v2f64(<2 x double> %a) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log10 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl log10 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -5326,7 +5326,7 @@ define <3 x double> @log10_v3f64(<3 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -5343,7 +5343,7 @@ define <3 x double> @log10_v3f64(<3 x double> %a) { ; CHECK-SD-NEXT: bl log10 ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -5353,7 +5353,7 @@ define <3 x double> @log10_v3f64(<3 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -5370,7 +5370,7 @@ define <3 x double> @log10_v3f64(<3 x double> %a) { ; CHECK-GI-NEXT: bl log10 ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -5384,33 +5384,33 @@ define <4 x double> @log10_v4f64(<4 x double> %a) { ; CHECK-SD-LABEL: log10_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log10 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log10 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl log10 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl log10 ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -5419,35 +5419,35 @@ define <4 x double> @log10_v4f64(<4 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log10 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl log10 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl log10 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl log10 ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -5461,21 +5461,21 @@ define <2 x float> @log10_v2f32(<2 x float> %a) { ; CHECK-SD-LABEL: log10_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -5484,8 +5484,8 @@ define <2 x float> @log10_v2f32(<2 x float> %a) { ; CHECK-GI-LABEL: log10_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -5494,13 +5494,13 @@ define <2 x float> @log10_v2f32(<2 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -5514,27 +5514,27 @@ define <3 x float> @log10_v3f32(<3 x float> %a) { ; CHECK-SD-LABEL: log10_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -5544,7 +5544,7 @@ define <3 x float> @log10_v3f32(<3 x float> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -5554,16 +5554,16 @@ define <3 x float> @log10_v3f32(<3 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -5579,34 +5579,34 @@ define <4 x float> @log10_v4f32(<4 x float> %a) { ; CHECK-SD-LABEL: log10_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -5615,9 +5615,9 @@ define <4 x float> @log10_v4f32(<4 x float> %a) { ; CHECK-GI-LABEL: log10_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -5629,24 +5629,24 @@ define <4 x float> @log10_v4f32(<4 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -5661,61 +5661,61 @@ define <8 x float> @log10_v8f32(<8 x float> %a) { ; CHECK-SD-LABEL: log10_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -5726,7 +5726,7 @@ define <8 x float> @log10_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -5735,7 +5735,7 @@ define <8 x float> @log10_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: .cfi_offset b12, -56 ; CHECK-GI-NEXT: .cfi_offset b13, -64 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] @@ -5745,48 +5745,48 @@ define <8 x float> @log10_v8f32(<8 x float> %a) { ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -5803,69 +5803,69 @@ define <7 x half> @log10_v7f16(<7 x half> %a) { ; CHECK-SD-LABEL: log10_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -5876,7 +5876,7 @@ define <7 x half> @log10_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -5895,47 +5895,47 @@ define <7 x half> @log10_v7f16(<7 x half> %a) { ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -5950,39 +5950,39 @@ define <4 x half> @log10_v4f16(<4 x half> %a) { ; CHECK-SD-LABEL: log10_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log10f -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -5991,9 +5991,9 @@ define <4 x half> @log10_v4f16(<4 x half> %a) { ; CHECK-GI-LABEL: log10_v4f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -6007,25 +6007,25 @@ define <4 x half> @log10_v4f16(<4 x half> %a) { ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -6042,69 +6042,69 @@ define <8 x half> @log10_v8f16(<8 x half> %a) { ; CHECK-SD-LABEL: log10_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -6112,11 +6112,11 @@ define <8 x half> @log10_v8f16(<8 x half> %a) { ; CHECK-GI-LABEL: log10_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -6137,47 +6137,47 @@ define <8 x half> @log10_v8f16(<8 x half> %a) { ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -6199,7 +6199,7 @@ define <16 x half> @log10_v16f16(<16 x half> %a) { ; CHECK-SD-LABEL: log10_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill @@ -6207,121 +6207,121 @@ define <16 x half> @log10_v16f16(<16 x half> %a) { ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl log10f ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -6348,7 +6348,7 @@ define <16 x half> @log10_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v2.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v1.h[1] ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] @@ -6359,132 +6359,132 @@ define <16 x half> @log10_v16f16(<16 x half> %a) { ; CHECK-GI-NEXT: mov h12, v0.h[6] ; CHECK-GI-NEXT: mov h13, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl log10f -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -6499,17 +6499,17 @@ define <2 x fp128> @log10_v2fp128(<2 x fp128> %a) { ; CHECK-LABEL: log10_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl log10l -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl log10l ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll index b65334e2461fd..118c627f1041b 100644 --- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll @@ -44,38 +44,38 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) { ; CHECK-NEON-LABEL: fmul_pow2_ldexp_4xfloat: ; CHECK-NEON: // %bb.0: ; CHECK-NEON-NEXT: sub sp, sp, #48 -; CHECK-NEON-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEON-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEON-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEON-NEXT: .cfi_offset w30, -16 ; CHECK-NEON-NEXT: mov w0, v0.s[1] -; CHECK-NEON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEON-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEON-NEXT: fmov s0, #9.00000000 ; CHECK-NEON-NEXT: bl ldexpf -; CHECK-NEON-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEON-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEON-NEXT: fmov s0, #9.00000000 ; CHECK-NEON-NEXT: fmov w0, s1 ; CHECK-NEON-NEXT: bl ldexpf -; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEON-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEON-NEXT: mov w0, v0.s[2] ; CHECK-NEON-NEXT: fmov s0, #9.00000000 ; CHECK-NEON-NEXT: bl ldexpf -; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEON-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEON-NEXT: mov w0, v0.s[3] ; CHECK-NEON-NEXT: fmov s0, #9.00000000 -; CHECK-NEON-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEON-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEON-NEXT: bl ldexpf -; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEON-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEON-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEON-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEON-NEXT: mov v0.16b, v1.16b ; CHECK-NEON-NEXT: add sp, sp, #48 diff --git a/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll index 6a91d85a71baf..3afeaf4e5043e 100644 --- a/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll +++ b/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll @@ -8,7 +8,7 @@ define { , } @cvtn_f16_tuple(i64 %stride, p ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -23,7 +23,7 @@ define { , } @cvtn_f16_tuple(i64 %stride, p ; CHECK-NEXT: fcvtn z1.b, { z10.h, z11.h } ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -49,7 +49,7 @@ define { , } @cvtnt_f32_tuple(i64 %stride, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -65,7 +65,7 @@ define { , } @cvtnt_f32_tuple(i64 %stride, ; CHECK-NEXT: fcvtnt z1.b, { z10.s, z11.s } ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 637c02875b84e..21c1b82dc4404 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -698,7 +698,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-CVT-GI-LABEL: stest_f64i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 48 @@ -718,7 +718,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixdfti ; CHECK-CVT-GI-NEXT: cmp x19, x21 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: cmp x20, #0 ; CHECK-CVT-GI-NEXT: cset w9, mi @@ -760,7 +760,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-FP16-GI-LABEL: stest_f64i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 @@ -780,7 +780,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixdfti ; CHECK-FP16-GI-NEXT: cmp x19, x21 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: cmp x20, #0 ; CHECK-FP16-GI-NEXT: cset w9, mi @@ -832,22 +832,22 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-CVT-SD-LABEL: utest_f64i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: mov d0, v0.d[1] ; CHECK-CVT-SD-NEXT: bl __fixunsdfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixunsdfti ; CHECK-CVT-SD-NEXT: cmp x1, #0 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-CVT-SD-NEXT: cmp x20, #0 ; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq @@ -861,22 +861,22 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-FP16-SD-LABEL: utest_f64i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov d0, v0.d[1] ; CHECK-FP16-SD-NEXT: bl __fixunsdfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunsdfti ; CHECK-FP16-SD-NEXT: cmp x1, #0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-FP16-SD-NEXT: cmp x20, #0 ; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq @@ -890,7 +890,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-CVT-GI-LABEL: utest_f64i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -905,7 +905,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixunsdfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -924,7 +924,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-FP16-GI-LABEL: utest_f64i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -939,7 +939,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixunsdfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -966,22 +966,22 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-CVT-SD-LABEL: ustest_f64i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixdfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: mov d0, v0.d[1] ; CHECK-CVT-SD-NEXT: bl __fixdfti ; CHECK-CVT-SD-NEXT: cmp x1, #1 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-CVT-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-CVT-SD-NEXT: cmp x20, #1 @@ -1003,22 +1003,22 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-FP16-SD-LABEL: ustest_f64i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixdfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov d0, v0.d[1] ; CHECK-FP16-SD-NEXT: bl __fixdfti ; CHECK-FP16-SD-NEXT: cmp x1, #1 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-FP16-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-FP16-SD-NEXT: cmp x20, #1 @@ -1040,7 +1040,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-CVT-GI-LABEL: ustest_f64i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -1055,7 +1055,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixdfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lt ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -1090,7 +1090,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-FP16-GI-LABEL: ustest_f64i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -1105,7 +1105,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixdfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lt ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -1162,7 +1162,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-CVT-GI-LABEL: stest_f32i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 48 @@ -1183,7 +1183,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixsfti ; CHECK-CVT-GI-NEXT: cmp x19, x21 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: cmp x20, #0 ; CHECK-CVT-GI-NEXT: cset w9, mi @@ -1225,7 +1225,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-FP16-GI-LABEL: stest_f32i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 @@ -1246,7 +1246,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixsfti ; CHECK-FP16-GI-NEXT: cmp x19, x21 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: cmp x20, #0 ; CHECK-FP16-GI-NEXT: cset w9, mi @@ -1298,23 +1298,23 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-CVT-SD-LABEL: utest_f32i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: mov s0, v0.s[1] ; CHECK-CVT-SD-NEXT: bl __fixunssfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixunssfti ; CHECK-CVT-SD-NEXT: cmp x1, #0 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-CVT-SD-NEXT: cmp x20, #0 ; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq @@ -1328,23 +1328,23 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-FP16-SD-LABEL: utest_f32i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov s0, v0.s[1] ; CHECK-FP16-SD-NEXT: bl __fixunssfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunssfti ; CHECK-FP16-SD-NEXT: cmp x1, #0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-FP16-SD-NEXT: cmp x20, #0 ; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq @@ -1358,7 +1358,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-CVT-GI-LABEL: utest_f32i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -1374,7 +1374,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixunssfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -1393,7 +1393,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-FP16-GI-LABEL: utest_f32i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -1409,7 +1409,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixunssfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -1436,23 +1436,23 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-CVT-SD-LABEL: ustest_f32i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixsfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: mov s0, v0.s[1] ; CHECK-CVT-SD-NEXT: bl __fixsfti ; CHECK-CVT-SD-NEXT: cmp x1, #1 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csinc x8, x1, xzr, lt ; CHECK-CVT-SD-NEXT: csel x9, x0, xzr, lt ; CHECK-CVT-SD-NEXT: cmp x20, #1 @@ -1474,23 +1474,23 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-FP16-SD-LABEL: ustest_f32i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixsfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov s0, v0.s[1] ; CHECK-FP16-SD-NEXT: bl __fixsfti ; CHECK-FP16-SD-NEXT: cmp x1, #1 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csinc x8, x1, xzr, lt ; CHECK-FP16-SD-NEXT: csel x9, x0, xzr, lt ; CHECK-FP16-SD-NEXT: cmp x20, #1 @@ -1512,7 +1512,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-CVT-GI-LABEL: ustest_f32i64: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -1528,7 +1528,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixsfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lt ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -1563,7 +1563,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-FP16-GI-LABEL: ustest_f32i64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -1579,7 +1579,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixsfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lt ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -1750,23 +1750,23 @@ define <2 x i64> @utest_f16i64(<2 x half> %x) { ; CHECK-CVT-SD-LABEL: utest_f16i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: mov h0, v0.h[1] ; CHECK-CVT-SD-NEXT: bl __fixunshfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixunshfti ; CHECK-CVT-SD-NEXT: cmp x1, #0 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-CVT-SD-NEXT: cmp x20, #0 ; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq @@ -1780,23 +1780,23 @@ define <2 x i64> @utest_f16i64(<2 x half> %x) { ; CHECK-FP16-SD-LABEL: utest_f16i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov h0, v0.h[1] ; CHECK-FP16-SD-NEXT: bl __fixunshfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunshfti ; CHECK-FP16-SD-NEXT: cmp x1, #0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-FP16-SD-NEXT: cmp x20, #0 ; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq @@ -1840,23 +1840,23 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-CVT-SD-LABEL: ustest_f16i64: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixhfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: mov h0, v0.h[1] ; CHECK-CVT-SD-NEXT: bl __fixhfti ; CHECK-CVT-SD-NEXT: cmp x1, #1 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csinc x8, x1, xzr, lt ; CHECK-CVT-SD-NEXT: csel x9, x0, xzr, lt ; CHECK-CVT-SD-NEXT: cmp x20, #1 @@ -1878,23 +1878,23 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-FP16-SD-LABEL: ustest_f16i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixhfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov h0, v0.h[1] ; CHECK-FP16-SD-NEXT: bl __fixhfti ; CHECK-FP16-SD-NEXT: cmp x1, #1 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csinc x8, x1, xzr, lt ; CHECK-FP16-SD-NEXT: csel x9, x0, xzr, lt ; CHECK-FP16-SD-NEXT: cmp x20, #1 @@ -2666,7 +2666,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-GI-LABEL: stest_f64i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 48 @@ -2686,7 +2686,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixdfti ; CHECK-CVT-GI-NEXT: cmp x19, x21 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: cmp x20, #0 ; CHECK-CVT-GI-NEXT: cset w9, mi @@ -2728,7 +2728,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-GI-LABEL: stest_f64i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 @@ -2748,7 +2748,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixdfti ; CHECK-FP16-GI-NEXT: cmp x19, x21 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: cmp x20, #0 ; CHECK-FP16-GI-NEXT: cset w9, mi @@ -2798,22 +2798,22 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-SD-LABEL: utest_f64i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: mov d0, v0.d[1] ; CHECK-CVT-SD-NEXT: bl __fixunsdfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixunsdfti ; CHECK-CVT-SD-NEXT: cmp x1, #0 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-CVT-SD-NEXT: cmp x20, #0 ; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq @@ -2827,22 +2827,22 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-SD-LABEL: utest_f64i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov d0, v0.d[1] ; CHECK-FP16-SD-NEXT: bl __fixunsdfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunsdfti ; CHECK-FP16-SD-NEXT: cmp x1, #0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-FP16-SD-NEXT: cmp x20, #0 ; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq @@ -2856,7 +2856,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-GI-LABEL: utest_f64i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -2871,7 +2871,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixunsdfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -2890,7 +2890,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-GI-LABEL: utest_f64i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -2905,7 +2905,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixunsdfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -2931,22 +2931,22 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-SD-LABEL: ustest_f64i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixdfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: mov d0, v0.d[1] ; CHECK-CVT-SD-NEXT: bl __fixdfti ; CHECK-CVT-SD-NEXT: cmp x1, #1 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-CVT-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-CVT-SD-NEXT: cmp x20, #1 @@ -2966,22 +2966,22 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-SD-LABEL: ustest_f64i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixdfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov d0, v0.d[1] ; CHECK-FP16-SD-NEXT: bl __fixdfti ; CHECK-FP16-SD-NEXT: cmp x1, #1 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-FP16-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-FP16-SD-NEXT: cmp x20, #1 @@ -3001,7 +3001,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-GI-LABEL: ustest_f64i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -3016,7 +3016,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixdfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lt ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -3051,7 +3051,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-GI-LABEL: ustest_f64i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3066,7 +3066,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixdfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lt ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -3121,7 +3121,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-GI-LABEL: stest_f32i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 48 @@ -3142,7 +3142,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixsfti ; CHECK-CVT-GI-NEXT: cmp x19, x21 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: cmp x20, #0 ; CHECK-CVT-GI-NEXT: cset w9, mi @@ -3184,7 +3184,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-GI-LABEL: stest_f32i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-48]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 @@ -3205,7 +3205,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixsfti ; CHECK-FP16-GI-NEXT: cmp x19, x21 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: cmp x20, #0 ; CHECK-FP16-GI-NEXT: cset w9, mi @@ -3255,23 +3255,23 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-SD-LABEL: utest_f32i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: mov s0, v0.s[1] ; CHECK-CVT-SD-NEXT: bl __fixunssfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixunssfti ; CHECK-CVT-SD-NEXT: cmp x1, #0 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-CVT-SD-NEXT: cmp x20, #0 ; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq @@ -3285,23 +3285,23 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-SD-LABEL: utest_f32i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov s0, v0.s[1] ; CHECK-FP16-SD-NEXT: bl __fixunssfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunssfti ; CHECK-FP16-SD-NEXT: cmp x1, #0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-FP16-SD-NEXT: cmp x20, #0 ; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq @@ -3315,7 +3315,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-GI-LABEL: utest_f32i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -3331,7 +3331,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixunssfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lo ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -3350,7 +3350,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-GI-LABEL: utest_f32i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3366,7 +3366,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixunssfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lo ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -3392,23 +3392,23 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-SD-LABEL: ustest_f32i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixsfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: mov s0, v0.s[1] ; CHECK-CVT-SD-NEXT: bl __fixsfti ; CHECK-CVT-SD-NEXT: cmp x1, #1 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-CVT-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-CVT-SD-NEXT: cmp x20, #1 @@ -3428,23 +3428,23 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-SD-LABEL: ustest_f32i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixsfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov s0, v0.s[1] ; CHECK-FP16-SD-NEXT: bl __fixsfti ; CHECK-FP16-SD-NEXT: cmp x1, #1 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-FP16-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-FP16-SD-NEXT: cmp x20, #1 @@ -3464,7 +3464,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-GI-LABEL: ustest_f32i64_mm: ; CHECK-CVT-GI: // %bb.0: // %entry ; CHECK-CVT-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-CVT-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-CVT-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-CVT-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-CVT-GI-NEXT: .cfi_offset w19, -8 @@ -3480,7 +3480,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-CVT-GI-NEXT: mov x20, x1 ; CHECK-CVT-GI-NEXT: bl __fixsfti ; CHECK-CVT-GI-NEXT: cmp x20, #1 -; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-CVT-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-CVT-GI-NEXT: cset w8, lt ; CHECK-CVT-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-CVT-GI-NEXT: cmp x1, #1 @@ -3515,7 +3515,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-GI-LABEL: ustest_f32i64_mm: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3531,7 +3531,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-FP16-GI-NEXT: mov x20, x1 ; CHECK-FP16-GI-NEXT: bl __fixsfti ; CHECK-FP16-GI-NEXT: cmp x20, #1 -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: cset w8, lt ; CHECK-FP16-GI-NEXT: csel w8, wzr, w8, eq ; CHECK-FP16-GI-NEXT: cmp x1, #1 @@ -3698,23 +3698,23 @@ define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-SD-LABEL: utest_f16i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: mov h0, v0.h[1] ; CHECK-CVT-SD-NEXT: bl __fixunshfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixunshfti ; CHECK-CVT-SD-NEXT: cmp x1, #0 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-CVT-SD-NEXT: cmp x20, #0 ; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq @@ -3728,23 +3728,23 @@ define <2 x i64> @utest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-SD-LABEL: utest_f16i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov h0, v0.h[1] ; CHECK-FP16-SD-NEXT: bl __fixunshfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunshfti ; CHECK-FP16-SD-NEXT: cmp x1, #0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq ; CHECK-FP16-SD-NEXT: cmp x20, #0 ; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq @@ -3787,23 +3787,23 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-CVT-SD-LABEL: ustest_f16i64_mm: ; CHECK-CVT-SD: // %bb.0: // %entry ; CHECK-CVT-SD-NEXT: sub sp, sp, #48 -; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-CVT-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-CVT-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-CVT-SD-NEXT: .cfi_offset w19, -8 ; CHECK-CVT-SD-NEXT: .cfi_offset w20, -16 ; CHECK-CVT-SD-NEXT: .cfi_offset w30, -32 ; CHECK-CVT-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-CVT-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-CVT-SD-NEXT: bl __fixhfti -; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-CVT-SD-NEXT: mov x19, x0 ; CHECK-CVT-SD-NEXT: mov x20, x1 ; CHECK-CVT-SD-NEXT: mov h0, v0.h[1] ; CHECK-CVT-SD-NEXT: bl __fixhfti ; CHECK-CVT-SD-NEXT: cmp x1, #1 -; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-CVT-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-CVT-SD-NEXT: cmp x20, #1 @@ -3823,23 +3823,23 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-SD-LABEL: ustest_f16i64_mm: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixhfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov h0, v0.h[1] ; CHECK-FP16-SD-NEXT: bl __fixhfti ; CHECK-FP16-SD-NEXT: cmp x1, #1 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, lt ; CHECK-FP16-SD-NEXT: csinc x9, x1, xzr, lt ; CHECK-FP16-SD-NEXT: cmp x20, #1 diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index df90f9d5f0910..549f20f99acdb 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -97,41 +97,41 @@ define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) { ; CHECK-SD-LABEL: fpext_v4f16_v4f128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-SD-NEXT: bl __extendhftf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov h1, v1.h[1] ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __extendhftf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __extendhftf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __extendhftf2 ; CHECK-SD-NEXT: mov v3.16b, v0.16b ; CHECK-SD-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fpext_v4f16_v4f128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -143,21 +143,21 @@ define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) { ; CHECK-GI-NEXT: mov h10, v0.h[3] ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-GI-NEXT: bl __extendhftf2 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __extendhftf2 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl __extendhftf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl __extendhftf2 ; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #80 ; CHECK-GI-NEXT: ret entry: @@ -169,40 +169,40 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) { ; CHECK-SD-LABEL: fpext_v4f32_v4f128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __extendsftf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __extendsftf2 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __extendsftf2 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl __extendsftf2 ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: ldp q0, q3, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #80 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fpext_v4f32_v4f128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -213,21 +213,21 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) { ; CHECK-GI-NEXT: mov s10, v0.s[3] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __extendsftf2 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __extendsftf2 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl __extendsftf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl __extendsftf2 ; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #80 ; CHECK-GI-NEXT: ret entry: @@ -239,29 +239,29 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) { ; CHECK-SD-LABEL: fpext_v4f64_v4f128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __extenddftf2 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl __extenddftf2 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __extenddftf2 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl __extenddftf2 ; CHECK-SD-NEXT: mov v3.16b, v0.16b ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #80 ; CHECK-SD-NEXT: ret ; @@ -269,31 +269,31 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __extenddftf2 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl __extenddftf2 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __extenddftf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl __extenddftf2 ; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #80 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll index dc93d5be9b3f3..3e9e1da87cd75 100644 --- a/llvm/test/CodeGen/AArch64/fpow.ll +++ b/llvm/test/CodeGen/AArch64/fpow.ll @@ -65,7 +65,7 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-SD-LABEL: pow_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill @@ -73,14 +73,14 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: bl pow ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-SD-NEXT: bl pow -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -89,7 +89,7 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -100,14 +100,14 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: bl pow ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d1, d9 ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl pow -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #48 @@ -123,7 +123,7 @@ define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-SD-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -149,7 +149,7 @@ define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-SD-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov d2, d0 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: fmov d0, d12 ; CHECK-SD-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -159,7 +159,7 @@ define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-GI-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -185,7 +185,7 @@ define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-GI-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov d2, d0 -; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-GI-NEXT: fmov d0, d12 ; CHECK-GI-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -198,7 +198,7 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-SD-LABEL: pow_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill @@ -207,28 +207,28 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-SD-NEXT: mov d1, v2.d[1] ; CHECK-SD-NEXT: bl pow ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-SD-NEXT: bl pow -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: bl pow ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-SD-NEXT: bl pow ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp, #16] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #96 ; CHECK-SD-NEXT: ret @@ -238,7 +238,7 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-GI-NEXT: sub sp, sp, #112 ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -246,9 +246,9 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-GI-NEXT: .cfi_offset b10, -40 ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: mov v4.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: str q3, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q3, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d10, v2.d[1] ; CHECK-GI-NEXT: mov d11, v3.d[1] @@ -257,27 +257,27 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-GI-NEXT: mov d9, v4.d[1] ; CHECK-GI-NEXT: bl pow ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov d1, d10 ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl pow ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl pow ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d1, d11 ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl pow ; CHECK-GI-NEXT: ldp q3, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] @@ -293,7 +293,7 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-SD-LABEL: pow_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -303,14 +303,14 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #64 @@ -320,7 +320,7 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -333,14 +333,14 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s9 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #48 @@ -354,7 +354,7 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-SD-LABEL: pow_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill @@ -362,22 +362,22 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #64 @@ -388,7 +388,7 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -403,18 +403,18 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s10 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] @@ -431,7 +431,7 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-SD-LABEL: pow_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill @@ -439,30 +439,30 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] ; CHECK-SD-NEXT: mov s1, v1.s[3] ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #64 @@ -474,7 +474,7 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -493,27 +493,27 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s12 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s13 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] @@ -529,7 +529,7 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-SD-LABEL: pow_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill @@ -538,61 +538,61 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-SD-NEXT: mov s1, v2.s[1] ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] ; CHECK-SD-NEXT: mov s1, v1.s[3] ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] ; CHECK-SD-NEXT: mov s1, v1.s[3] ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #96 ; CHECK-SD-NEXT: ret @@ -604,7 +604,7 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #160] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #176] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #192] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #192] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 208 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -630,63 +630,63 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-GI-NEXT: mov s15, v1.s[2] ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 -; CHECK-GI-NEXT: str s2, [sp, #48] // 4-byte Folded Spill +; CHECK-GI-NEXT: str s2, [sp, #48] // 4-byte Spill ; CHECK-GI-NEXT: mov s2, v4.s[2] -; CHECK-GI-NEXT: str s2, [sp, #112] // 4-byte Folded Spill +; CHECK-GI-NEXT: str s2, [sp, #112] // 4-byte Spill ; CHECK-GI-NEXT: mov s2, v3.s[3] ; CHECK-GI-NEXT: stp s2, s5, [sp, #200] // 8-byte Folded Spill ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s14 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s15 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s13 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fmov s1, s12 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr s0, [sp, #48] // 4-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr s0, [sp, #48] // 4-byte Reload ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Reload ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: ldp s1, s0, [sp, #200] // 8-byte Folded Reload ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #176] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #192] // 8-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -703,7 +703,7 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-LABEL: pow_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h3, v0.h[1] @@ -713,7 +713,7 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h2 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 @@ -723,15 +723,15 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: mov h1, v1.h[3] @@ -739,9 +739,9 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] ; CHECK-SD-NEXT: mov h1, v1.h[4] @@ -749,9 +749,9 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] ; CHECK-SD-NEXT: mov h1, v1.h[5] @@ -759,9 +759,9 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] ; CHECK-SD-NEXT: mov h1, v1.h[6] @@ -769,9 +769,9 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] ; CHECK-SD-NEXT: mov h1, v1.h[7] @@ -779,8 +779,8 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -792,7 +792,7 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -812,71 +812,71 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NEXT: mov h15, v1.h[2] ; CHECK-GI-NEXT: mov h8, v1.h[3] ; CHECK-GI-NEXT: mov h13, v1.h[4] -; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v0.h[6] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h2, [sp, #80] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #80] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[5] -; CHECK-GI-NEXT: str h2, [sp, #172] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #172] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[6] ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str h2, [sp, #174] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #174] // 2-byte Spill ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h14 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h15 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h8 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h13 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #172] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #172] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -891,7 +891,7 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-SD-LABEL: pow_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -902,38 +902,38 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-SD-NEXT: fcvt s0, h3 ; CHECK-SD-NEXT: fcvt s1, h2 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s2, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s1, h0 ; CHECK-SD-NEXT: fmov s0, s2 ; CHECK-SD-NEXT: bl powf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h3, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov h2, v0.h[2] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v3.h[1], v0.h[0] ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: fcvt s1, h2 -; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q3, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q3, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q3, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: mov h2, v2.h[3] ; CHECK-SD-NEXT: mov v3.h[2], v0.h[0] ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: fcvt s1, h2 -; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q3, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #64 @@ -945,7 +945,7 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -968,27 +968,27 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-GI-NEXT: fcvt s2, h8 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h11 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h12 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h13 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] @@ -1006,7 +1006,7 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-LABEL: pow_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h3, v0.h[1] @@ -1016,7 +1016,7 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h2 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 @@ -1026,15 +1026,15 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: mov h1, v1.h[3] @@ -1042,9 +1042,9 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] ; CHECK-SD-NEXT: mov h1, v1.h[4] @@ -1052,9 +1052,9 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] ; CHECK-SD-NEXT: mov h1, v1.h[5] @@ -1062,9 +1062,9 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] ; CHECK-SD-NEXT: mov h1, v1.h[6] @@ -1072,9 +1072,9 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] ; CHECK-SD-NEXT: mov h1, v1.h[7] @@ -1082,8 +1082,8 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -1095,7 +1095,7 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #176] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #176] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 192 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -1115,84 +1115,84 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-GI-NEXT: mov h9, v1.h[2] ; CHECK-GI-NEXT: mov h10, v1.h[3] ; CHECK-GI-NEXT: mov h15, v1.h[4] -; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v0.h[6] -; CHECK-GI-NEXT: str h2, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h2, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[5] -; CHECK-GI-NEXT: str h2, [sp, #186] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #186] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[6] -; CHECK-GI-NEXT: str h2, [sp, #188] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #188] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[7] ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str h2, [sp, #190] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #190] // 2-byte Spill ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h8 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h9 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h10 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h14 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h15 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #186] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #186] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #188] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #188] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #190] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #190] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -1207,7 +1207,7 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-LABEL: pow_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill @@ -1218,10 +1218,10 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s0, h3 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 @@ -1229,70 +1229,70 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[4] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[5] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[6] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[7] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: mov h1, v1.h[1] @@ -1300,7 +1300,7 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 @@ -1310,15 +1310,15 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: mov h1, v1.h[3] @@ -1326,9 +1326,9 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] ; CHECK-SD-NEXT: mov h1, v1.h[4] @@ -1336,9 +1336,9 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] ; CHECK-SD-NEXT: mov h1, v1.h[5] @@ -1346,9 +1346,9 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] ; CHECK-SD-NEXT: mov h1, v1.h[6] @@ -1356,9 +1356,9 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] ; CHECK-SD-NEXT: mov h1, v1.h[7] @@ -1366,7 +1366,7 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl powf ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #48] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -1393,203 +1393,203 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v4.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[4] ; CHECK-GI-NEXT: mov h12, v0.h[1] ; CHECK-GI-NEXT: mov h13, v0.h[2] -; CHECK-GI-NEXT: str q3, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q3, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v0.h[3] ; CHECK-GI-NEXT: mov h15, v2.h[1] ; CHECK-GI-NEXT: mov h8, v2.h[2] ; CHECK-GI-NEXT: mov h9, v2.h[3] ; CHECK-GI-NEXT: mov h10, v2.h[4] ; CHECK-GI-NEXT: mov h11, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #272] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #272] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[5] -; CHECK-GI-NEXT: str h1, [sp, #240] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #240] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[6] -; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #144] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #144] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[1] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[2] -; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[3] -; CHECK-GI-NEXT: str h1, [sp, #128] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #128] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[4] -; CHECK-GI-NEXT: str h1, [sp, #192] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #192] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[5] -; CHECK-GI-NEXT: str h1, [sp, #256] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #256] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[6] -; CHECK-GI-NEXT: str h1, [sp, #336] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #336] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[7] -; CHECK-GI-NEXT: str h1, [sp, #352] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #352] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #12] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #12] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #14] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #14] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[1] -; CHECK-GI-NEXT: str h1, [sp, #44] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #44] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[2] -; CHECK-GI-NEXT: str h1, [sp, #46] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #46] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[3] -; CHECK-GI-NEXT: str h1, [sp, #78] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #78] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[4] -; CHECK-GI-NEXT: str h1, [sp, #110] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #110] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[5] -; CHECK-GI-NEXT: str h1, [sp, #174] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #174] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[6] -; CHECK-GI-NEXT: str h1, [sp, #238] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #238] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[7] -; CHECK-GI-NEXT: str h1, [sp, #302] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #302] // 2-byte Spill ; CHECK-GI-NEXT: fcvt s1, h2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h15 -; CHECK-GI-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #304] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h8 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf ; CHECK-GI-NEXT: fcvt s2, h14 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h9 -; CHECK-GI-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #320] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #272] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #272] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 ; CHECK-GI-NEXT: fcvt s1, h10 -; CHECK-GI-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #272] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #240] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #240] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 ; CHECK-GI-NEXT: fcvt s1, h11 -; CHECK-GI-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #240] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #12] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #12] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #144] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #144] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #14] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #44] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #44] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #46] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #46] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #128] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #128] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #78] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #78] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #192] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #192] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #110] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #110] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #256] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #256] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #256] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #336] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #336] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #238] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #336] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #238] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr h1, [sp, #352] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #352] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #302] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #352] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #302] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl powf -; CHECK-GI-NEXT: ldr q3, [sp, #304] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #304] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #432] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #416] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #320] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #320] // 16-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #400] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #384] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #272] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #272] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #368] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] ; CHECK-GI-NEXT: ldp q4, q2, [sp, #240] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[4], v4.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #336] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #336] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #352] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -1604,36 +1604,36 @@ define <2 x fp128> @pow_v2fp128(<2 x fp128> %a, <2 x fp128> %b) { ; CHECK-SD-LABEL: pow_v2fp128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: bl powl -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl powl ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: pow_v2fp128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl powl -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: bl powl ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fpowi.ll b/llvm/test/CodeGen/AArch64/fpowi.ll index 8948556d1b380..f76e3a8679886 100644 --- a/llvm/test/CodeGen/AArch64/fpowi.ll +++ b/llvm/test/CodeGen/AArch64/fpowi.ll @@ -67,17 +67,17 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __powidf2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __powidf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] @@ -87,7 +87,7 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) { ; CHECK-GI-LABEL: powi_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -98,14 +98,14 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __powidf2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl __powidf2 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #48 @@ -190,29 +190,29 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: mov w19, w0 -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __powidf2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __powidf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl __powidf2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __powidf2 ; CHECK-SD-NEXT: fmov d1, d0 @@ -232,25 +232,25 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) { ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 ; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __powidf2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl __powidf2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __powidf2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl __powidf2 @@ -259,7 +259,7 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) { ; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -278,17 +278,17 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] @@ -299,7 +299,7 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) { ; CHECK-GI-LABEL: powi_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -311,14 +311,14 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #48 @@ -336,25 +336,25 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] @@ -378,12 +378,12 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl __powisf2 @@ -409,33 +409,33 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] @@ -446,7 +446,7 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) { ; CHECK-GI-LABEL: powi_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 @@ -462,27 +462,27 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -506,60 +506,60 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) { ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 @@ -581,7 +581,7 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) { ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: .cfi_offset b12, -56 ; CHECK-GI-NEXT: .cfi_offset b13, -64 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] @@ -592,56 +592,56 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: ldp q3, q2, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp x30, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -663,72 +663,72 @@ define <7 x half> @powi_v7f16(<7 x half> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 @@ -762,37 +762,37 @@ define <7 x half> @powi_v7f16(<7 x half> %a, i32 %b) { ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: ldp q2, q1, [sp, #64] // 32-byte Folded Reload @@ -827,37 +827,37 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __powisf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -867,7 +867,7 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) { ; CHECK-GI-LABEL: powi_v4f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 @@ -886,27 +886,27 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) { ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -928,72 +928,72 @@ define <8 x half> @powi_v8f16(<8 x half> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1002,7 +1002,7 @@ define <8 x half> @powi_v8f16(<8 x half> %a, i32 %b) { ; CHECK-GI-LABEL: powi_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #192 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill @@ -1030,49 +1030,49 @@ define <8 x half> @powi_v8f16(<8 x half> %a, i32 %b) { ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: ldp q2, q1, [sp, #80] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: ldp x30, x19, [sp, #176] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #48] // 32-byte Folded Reload @@ -1083,7 +1083,7 @@ define <8 x half> @powi_v8f16(<8 x half> %a, i32 %b) { ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -1109,131 +1109,131 @@ define <16 x half> @powi_v16f16(<16 x half> %a, i32 %b) { ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: mov w0, w19 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl __powisf2 ; CHECK-SD-NEXT: fmov s1, s0 @@ -1251,7 +1251,7 @@ define <16 x half> @powi_v16f16(<16 x half> %a, i32 %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #256] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #272] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #288] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x29, [sp, #304] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x29, [sp, #304] // 8-byte Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #320] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 336 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -1266,7 +1266,7 @@ define <16 x half> @powi_v16f16(<16 x half> %a, i32 %b) { ; CHECK-GI-NEXT: .cfi_offset b14, -88 ; CHECK-GI-NEXT: .cfi_offset b15, -96 ; CHECK-GI-NEXT: mov v2.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v1.h[1] ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h8, v0.h[1] @@ -1278,147 +1278,147 @@ define <16 x half> @powi_v16f16(<16 x half> %a, i32 %b) { ; CHECK-GI-NEXT: mov h13, v0.h[6] ; CHECK-GI-NEXT: mov h15, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #112] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #112] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Spill ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr h1, [sp, #112] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #112] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl __powisf2 -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x29, [sp, #304] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload +; CHECK-GI-NEXT: ldr x29, [sp, #304] // 8-byte Reload ; CHECK-GI-NEXT: ldp x30, x19, [sp, #320] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q4, [sp, #112] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[5], v4.h[0] ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -1438,15 +1438,15 @@ define <2 x fp128> @powi_v2fp128(<2 x fp128> %a, i32 %b) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov w19, w0 -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __powitf2 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: bl __powitf2 ; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -1457,16 +1457,16 @@ define <2 x fp128> @powi_v2fp128(<2 x fp128> %a, i32 %b) { ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __powitf2 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: mov w0, w19 ; CHECK-GI-NEXT: bl __powitf2 ; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f6053cee50dae..ae6f796e0b394 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -3026,16 +3026,16 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f64_v2i128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: bl __fixdfti -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x19, x0 ; CHECK-NOFP16-SD-NEXT: mov x20, x1 ; CHECK-NOFP16-SD-NEXT: mov d0, v0.d[1] @@ -3045,23 +3045,23 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x19 ; CHECK-NOFP16-SD-NEXT: mov x1, x20 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fptos_v2f64_v2i128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixdfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov d0, v0.d[1] @@ -3071,14 +3071,14 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x19 ; CHECK-FP16-SD-NEXT: mov x1, x20 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: fptos_v2f64_v2i128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -3097,14 +3097,14 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: fptos_v2f64_v2i128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3123,7 +3123,7 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-FP16-GI-NEXT: ret entry: @@ -3135,16 +3135,16 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f64_v2i128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: bl __fixunsdfti -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x19, x0 ; CHECK-NOFP16-SD-NEXT: mov x20, x1 ; CHECK-NOFP16-SD-NEXT: mov d0, v0.d[1] @@ -3154,23 +3154,23 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x19 ; CHECK-NOFP16-SD-NEXT: mov x1, x20 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fptou_v2f64_v2i128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunsdfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov d0, v0.d[1] @@ -3180,14 +3180,14 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x19 ; CHECK-FP16-SD-NEXT: mov x1, x20 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: fptou_v2f64_v2i128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -3206,14 +3206,14 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: fptou_v2f64_v2i128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3232,7 +3232,7 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-FP16-GI-NEXT: ret entry: @@ -4356,17 +4356,17 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f32_v2i128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NOFP16-SD-NEXT: bl __fixsfti -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x19, x0 ; CHECK-NOFP16-SD-NEXT: mov x20, x1 ; CHECK-NOFP16-SD-NEXT: mov s0, v0.s[1] @@ -4376,24 +4376,24 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x19 ; CHECK-NOFP16-SD-NEXT: mov x1, x20 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fptos_v2f32_v2i128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixsfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov s0, v0.s[1] @@ -4403,14 +4403,14 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x19 ; CHECK-FP16-SD-NEXT: mov x1, x20 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: fptos_v2f32_v2i128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -4430,14 +4430,14 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: fptos_v2f32_v2i128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -4457,7 +4457,7 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-FP16-GI-NEXT: ret entry: @@ -4469,17 +4469,17 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f32_v2i128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NOFP16-SD-NEXT: bl __fixunssfti -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x19, x0 ; CHECK-NOFP16-SD-NEXT: mov x20, x1 ; CHECK-NOFP16-SD-NEXT: mov s0, v0.s[1] @@ -4489,24 +4489,24 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x19 ; CHECK-NOFP16-SD-NEXT: mov x1, x20 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fptou_v2f32_v2i128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunssfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov s0, v0.s[1] @@ -4516,14 +4516,14 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x19 ; CHECK-FP16-SD-NEXT: mov x1, x20 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: fptou_v2f32_v2i128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -4543,14 +4543,14 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: fptou_v2f32_v2i128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -4570,7 +4570,7 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-FP16-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-FP16-GI-NEXT: ret entry: @@ -6588,17 +6588,17 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f16_v2i128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-NOFP16-SD-NEXT: bl __fixhfti -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x19, x0 ; CHECK-NOFP16-SD-NEXT: mov x20, x1 ; CHECK-NOFP16-SD-NEXT: mov h0, v0.h[1] @@ -6608,24 +6608,24 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x19 ; CHECK-NOFP16-SD-NEXT: mov x1, x20 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fptos_v2f16_v2i128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixhfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov h0, v0.h[1] @@ -6635,7 +6635,7 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x19 ; CHECK-FP16-SD-NEXT: mov x1, x20 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; @@ -6669,17 +6669,17 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f16_v2i128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-NOFP16-SD-NEXT: bl __fixunshfti -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x19, x0 ; CHECK-NOFP16-SD-NEXT: mov x20, x1 ; CHECK-NOFP16-SD-NEXT: mov h0, v0.h[1] @@ -6689,24 +6689,24 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x19 ; CHECK-NOFP16-SD-NEXT: mov x1, x20 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: fptou_v2f16_v2i128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-SD-NEXT: .cfi_offset w20, -16 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -32 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-FP16-SD-NEXT: bl __fixunshfti -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x19, x0 ; CHECK-FP16-SD-NEXT: mov x20, x1 ; CHECK-FP16-SD-NEXT: mov h0, v0.h[1] @@ -6716,7 +6716,7 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x19 ; CHECK-FP16-SD-NEXT: mov x1, x20 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; @@ -6750,19 +6750,19 @@ define <2 x i64> @fptos_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f128_v2i64: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: mov v0.16b, v1.16b ; CHECK-NOFP16-SD-NEXT: bl __fixtfdi ; CHECK-NOFP16-SD-NEXT: fmov d0, x0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixtfdi ; CHECK-NOFP16-SD-NEXT: fmov d0, x0 -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret @@ -6770,19 +6770,19 @@ define <2 x i64> @fptos_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptos_v2f128_v2i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov v0.16b, v1.16b ; CHECK-FP16-SD-NEXT: bl __fixtfdi ; CHECK-FP16-SD-NEXT: fmov d0, x0 -; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixtfdi ; CHECK-FP16-SD-NEXT: fmov d0, x0 -; CHECK-FP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret @@ -6794,9 +6794,9 @@ define <2 x i64> @fptos_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixtfdi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov x19, x0 ; CHECK-NOFP16-GI-NEXT: bl __fixtfdi ; CHECK-NOFP16-GI-NEXT: fmov d0, x19 @@ -6812,9 +6812,9 @@ define <2 x i64> @fptos_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixtfdi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov x19, x0 ; CHECK-FP16-GI-NEXT: bl __fixtfdi ; CHECK-FP16-GI-NEXT: fmov d0, x19 @@ -6831,19 +6831,19 @@ define <2 x i64> @fptou_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f128_v2i64: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: mov v0.16b, v1.16b ; CHECK-NOFP16-SD-NEXT: bl __fixunstfdi ; CHECK-NOFP16-SD-NEXT: fmov d0, x0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixunstfdi ; CHECK-NOFP16-SD-NEXT: fmov d0, x0 -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret @@ -6851,19 +6851,19 @@ define <2 x i64> @fptou_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptou_v2f128_v2i64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov v0.16b, v1.16b ; CHECK-FP16-SD-NEXT: bl __fixunstfdi ; CHECK-FP16-SD-NEXT: fmov d0, x0 -; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixunstfdi ; CHECK-FP16-SD-NEXT: fmov d0, x0 -; CHECK-FP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret @@ -6875,9 +6875,9 @@ define <2 x i64> @fptou_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixunstfdi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov x19, x0 ; CHECK-NOFP16-GI-NEXT: bl __fixunstfdi ; CHECK-NOFP16-GI-NEXT: fmov d0, x19 @@ -6893,9 +6893,9 @@ define <2 x i64> @fptou_v2f128_v2i64(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixunstfdi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov x19, x0 ; CHECK-FP16-GI-NEXT: bl __fixunstfdi ; CHECK-FP16-GI-NEXT: fmov d0, x19 @@ -6912,17 +6912,17 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f128_v2i32: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi ; CHECK-NOFP16-SD-NEXT: fmov s0, w0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -6931,17 +6931,17 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptos_v2f128_v2i32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __fixtfsi ; CHECK-FP16-SD-NEXT: fmov s0, w0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixtfsi -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -6954,9 +6954,9 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixtfsi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov w19, w0 ; CHECK-NOFP16-GI-NEXT: bl __fixtfsi ; CHECK-NOFP16-GI-NEXT: fmov s0, w19 @@ -6973,9 +6973,9 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixtfsi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov w19, w0 ; CHECK-FP16-GI-NEXT: bl __fixtfsi ; CHECK-FP16-GI-NEXT: fmov s0, w19 @@ -6993,17 +6993,17 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f128_v2i32: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __fixunstfsi ; CHECK-NOFP16-SD-NEXT: fmov s0, w0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixunstfsi -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -7012,17 +7012,17 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptou_v2f128_v2i32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __fixunstfsi ; CHECK-FP16-SD-NEXT: fmov s0, w0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixunstfsi -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7035,9 +7035,9 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixunstfsi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov w19, w0 ; CHECK-NOFP16-GI-NEXT: bl __fixunstfsi ; CHECK-NOFP16-GI-NEXT: fmov s0, w19 @@ -7054,9 +7054,9 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixunstfsi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov w19, w0 ; CHECK-FP16-GI-NEXT: bl __fixunstfsi ; CHECK-FP16-GI-NEXT: fmov s0, w19 @@ -7074,17 +7074,17 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f128_v2i16: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi ; CHECK-NOFP16-SD-NEXT: fmov s0, w0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -7093,17 +7093,17 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptos_v2f128_v2i16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __fixtfsi ; CHECK-FP16-SD-NEXT: fmov s0, w0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixtfsi -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7116,9 +7116,9 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixtfsi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov w19, w0 ; CHECK-NOFP16-GI-NEXT: bl __fixtfsi ; CHECK-NOFP16-GI-NEXT: fmov s0, w19 @@ -7135,9 +7135,9 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixtfsi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov w19, w0 ; CHECK-FP16-GI-NEXT: bl __fixtfsi ; CHECK-FP16-GI-NEXT: fmov s0, w19 @@ -7155,17 +7155,17 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f128_v2i16: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi ; CHECK-NOFP16-SD-NEXT: fmov s0, w0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -7174,17 +7174,17 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptou_v2f128_v2i16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __fixtfsi ; CHECK-FP16-SD-NEXT: fmov s0, w0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixtfsi -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7197,9 +7197,9 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixunstfsi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov w19, w0 ; CHECK-NOFP16-GI-NEXT: bl __fixunstfsi ; CHECK-NOFP16-GI-NEXT: fmov s0, w19 @@ -7216,9 +7216,9 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixunstfsi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov w19, w0 ; CHECK-FP16-GI-NEXT: bl __fixunstfsi ; CHECK-FP16-GI-NEXT: fmov s0, w19 @@ -7236,17 +7236,17 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptos_v2f128_v2i8: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi ; CHECK-NOFP16-SD-NEXT: fmov s0, w0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -7255,17 +7255,17 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptos_v2f128_v2i8: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __fixtfsi ; CHECK-FP16-SD-NEXT: fmov s0, w0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixtfsi -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7278,9 +7278,9 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixtfsi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov w19, w0 ; CHECK-NOFP16-GI-NEXT: bl __fixtfsi ; CHECK-NOFP16-GI-NEXT: fmov s0, w19 @@ -7297,9 +7297,9 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixtfsi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov w19, w0 ; CHECK-FP16-GI-NEXT: bl __fixtfsi ; CHECK-FP16-GI-NEXT: fmov s0, w19 @@ -7317,17 +7317,17 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-NOFP16-SD-LABEL: fptou_v2f128_v2i8: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi ; CHECK-NOFP16-SD-NEXT: fmov s0, w0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: bl __fixtfsi -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -7336,17 +7336,17 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-FP16-SD-LABEL: fptou_v2f128_v2i8: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __fixtfsi ; CHECK-FP16-SD-NEXT: fmov s0, w0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP16-SD-NEXT: bl __fixtfsi -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], w0 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7359,9 +7359,9 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __fixunstfsi -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: mov w19, w0 ; CHECK-NOFP16-GI-NEXT: bl __fixunstfsi ; CHECK-NOFP16-GI-NEXT: fmov s0, w19 @@ -7378,9 +7378,9 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) { ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -16 -; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __fixunstfsi -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: mov w19, w0 ; CHECK-FP16-GI-NEXT: bl __fixunstfsi ; CHECK-FP16-GI-NEXT: fmov s0, w19 @@ -7398,15 +7398,15 @@ define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) { ; CHECK-LABEL: fptos_v2f128_v2i128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl __fixtfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: bl __fixtfti @@ -7415,7 +7415,7 @@ define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) { ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: @@ -7427,15 +7427,15 @@ define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) { ; CHECK-LABEL: fptou_v2f128_v2i128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl __fixunstfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: bl __fixunstfti @@ -7444,7 +7444,7 @@ define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) { ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll index ddee23cc3fc50..b6cbe9eb46389 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -149,13 +149,13 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; CHECK-SD-LABEL: test_signed_i100_f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s8, s0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v0.2s, #241, lsl #24 ; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fcmp s8, s0 ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000 @@ -173,13 +173,13 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; CHECK-GI-LABEL: test_signed_i100_f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov s8, s0 ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: movi v0.2s, #241, lsl #24 ; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-GI-NEXT: mov x10, #34359738367 // =0x7ffffffff -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: fcmp s8, s0 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov x8, #34359738368 // =0x800000000 @@ -201,13 +201,13 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; CHECK-SD-LABEL: test_signed_i128_f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s8, s0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v0.2s, #255, lsl #24 ; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fcmp s8, s0 ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 @@ -225,13 +225,13 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; CHECK-GI-LABEL: test_signed_i128_f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov s8, s0 ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: movi v0.2s, #255, lsl #24 ; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-GI-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: fcmp s8, s0 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 @@ -394,12 +394,12 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; CHECK-SD-LABEL: test_signed_i100_f64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov d8, d0 ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000 ; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff ; CHECK-SD-NEXT: fcmp d8, d0 @@ -419,12 +419,12 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; CHECK-GI-LABEL: test_signed_i100_f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov d8, d0 ; CHECK-GI-NEXT: bl __fixdfti ; CHECK-GI-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000 ; CHECK-GI-NEXT: mov x10, #34359738367 // =0x7ffffffff -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff ; CHECK-GI-NEXT: fcmp d8, d0 @@ -448,12 +448,12 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; CHECK-SD-LABEL: test_signed_i128_f64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov d8, d0 ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000 ; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff ; CHECK-SD-NEXT: fcmp d8, d0 @@ -473,12 +473,12 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; CHECK-GI-LABEL: test_signed_i128_f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov d8, d0 ; CHECK-GI-NEXT: bl __fixdfti ; CHECK-GI-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000 ; CHECK-GI-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff ; CHECK-GI-NEXT: fcmp d8, d0 @@ -853,13 +853,13 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v0.2s, #241, lsl #24 ; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fcmp s8, s0 ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000 @@ -895,13 +895,13 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v0.2s, #255, lsl #24 ; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fcmp s8, s0 ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 @@ -941,20 +941,20 @@ define i32 @test_signed_f128_i32(fp128 %f) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: adrp x8, .LCPI30_0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: csel w19, w8, w0, mi ; CHECK-SD-NEXT: adrp x8, .LCPI30_1 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w8, w19, gt @@ -969,17 +969,17 @@ define i32 @test_signed_f128_i32(fp128 %f) { ; CHECK-GI-LABEL: test_signed_f128_i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 @@ -999,12 +999,12 @@ define i32 @test_signed_f128_i32(fp128 %f) { ; CHECK-GI-NEXT: csel x8, x20, x8, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: csel w0, wzr, w19, ne ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: add sp, sp, #48 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index b963acd8cb2a1..6eeceb2e41321 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -489,20 +489,20 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: csel w19, w8, w0, mi ; CHECK-SD-NEXT: adrp x8, .LCPI14_1 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w8, w19, gt @@ -518,17 +518,17 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-GI-LABEL: test_signed_v1f128_v1i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 @@ -548,12 +548,12 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x8, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: csel w8, wzr, w19, ne ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov s0, w8 @@ -567,7 +567,7 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-SD-LABEL: test_signed_v2f128_v2i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 112 @@ -581,45 +581,45 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi ; CHECK-SD-NEXT: adrp x8, .LCPI15_1 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-SD-NEXT: mov w20, #-2147483648 // =0x80000000 ; CHECK-SD-NEXT: csel w19, w20, w0, mi -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w21, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w21, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w22, wzr, w19, ne ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, w20, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w21, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-SD-NEXT: csel w8, wzr, w19, ne ; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov s0, w8 @@ -632,7 +632,7 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-LABEL: test_signed_v2f128_v2i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #112 -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 @@ -642,13 +642,13 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: .cfi_offset w22, -32 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 @@ -659,7 +659,7 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 @@ -669,18 +669,18 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x21, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w21, wzr, w19, ne ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -694,13 +694,13 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: fmov s0, w21 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: csel w8, wzr, w19, ne ; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload @@ -726,62 +726,62 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: .cfi_offset w22, -32 ; CHECK-SD-NEXT: .cfi_offset w23, -40 ; CHECK-SD-NEXT: .cfi_offset w30, -48 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: mov v0.16b, v2.16b ; CHECK-SD-NEXT: stp q2, q1, [sp, #32] // 32-byte Folded Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi ; CHECK-SD-NEXT: adrp x8, .LCPI16_1 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-SD-NEXT: mov w20, #-2147483648 // =0x80000000 ; CHECK-SD-NEXT: csel w19, w20, w0, mi -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w22, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w22, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w21, wzr, w19, ne ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, w20, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w22, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w23, wzr, w19, ne ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, w20, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w22, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b @@ -811,13 +811,13 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: .cfi_offset w23, -40 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q2, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-GI-NEXT: str q2, [sp, #64] // 16-byte Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 @@ -828,7 +828,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 @@ -838,7 +838,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x21, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 @@ -846,9 +846,9 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w21, wzr, w19, ne ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -862,18 +862,18 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x23, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w23, wzr, w19, ne ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -887,7 +887,7 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 @@ -909,7 +909,7 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-LABEL: test_signed_v4f128_v4i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #144 -; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 144 @@ -921,24 +921,24 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v2.16b, v1.16b ; CHECK-SD-NEXT: adrp x8, .LCPI17_0 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi ; CHECK-SD-NEXT: adrp x8, .LCPI17_1 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-SD-NEXT: mov w20, #-2147483648 // =0x80000000 ; CHECK-SD-NEXT: csel w19, w20, w0, mi -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w21, #2147483647 // =0x7fffffff ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w21, w19, gt @@ -948,36 +948,36 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w22, wzr, w19, ne ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, w20, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w21, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: csel w8, wzr, w19, ne ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov v0.s[1], w22 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, w20, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w21, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b @@ -986,25 +986,25 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w8, wzr, w19, ne ; CHECK-SD-NEXT: mov v0.s[2], w8 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfsi -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, w20, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w19, w21, w19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-SD-NEXT: csel w8, wzr, w19, ne ; CHECK-SD-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload @@ -1015,7 +1015,7 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-LABEL: test_signed_v4f128_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #160 -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill @@ -1030,10 +1030,10 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 ; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: stp q1, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 @@ -1044,7 +1044,7 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 @@ -1054,18 +1054,18 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x21, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w21, wzr, w19, ne ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -1079,12 +1079,12 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x23, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w23, wzr, w19, ne ; CHECK-GI-NEXT: bl __gttf2 @@ -1103,7 +1103,7 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x24, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 @@ -1111,9 +1111,9 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w24, wzr, w19, ne ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -1127,13 +1127,13 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: fmov s0, w21 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: csel w8, wzr, w19, ne ; CHECK-GI-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload @@ -1539,9 +1539,9 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-SD-LABEL: test_signed_v2f32_v2i100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 @@ -1554,11 +1554,11 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset b9, -56 ; CHECK-SD-NEXT: .cfi_offset b10, -64 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #241, lsl #24 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 @@ -1576,13 +1576,13 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-SD-NEXT: csel x20, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x21, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x22, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -1597,9 +1597,9 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-GI-LABEL: test_signed_v2f32_v2i100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 @@ -1612,12 +1612,12 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b9, -56 ; CHECK-GI-NEXT: .cfi_offset b10, -64 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: movi v9.2s, #241, lsl #24 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-GI-NEXT: fmov s10, w8 ; CHECK-GI-NEXT: mov x21, #34359738368 // =0x800000000 @@ -1634,13 +1634,13 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-GI-NEXT: csel x20, xzr, x9, vs ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: fcmp s8, s9 -; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, x21, x1, lt ; CHECK-GI-NEXT: fcmp s8, s10 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: csinv x8, x8, xzr, le ; CHECK-GI-NEXT: csel x9, x22, x9, gt @@ -1659,9 +1659,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-SD-LABEL: test_signed_v2f32_v2i128: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 @@ -1674,11 +1674,11 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset b9, -56 ; CHECK-SD-NEXT: .cfi_offset b10, -64 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 @@ -1696,13 +1696,13 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-SD-NEXT: csel x20, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x21, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x22, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -1717,9 +1717,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-GI-LABEL: test_signed_v2f32_v2i128: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 @@ -1732,12 +1732,12 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b9, -56 ; CHECK-GI-NEXT: .cfi_offset b10, -64 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: movi v9.2s, #255, lsl #24 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-GI-NEXT: fmov s10, w8 ; CHECK-GI-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 @@ -1754,13 +1754,13 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-GI-NEXT: csel x20, xzr, x9, vs ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: fcmp s8, s9 -; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, x21, x1, lt ; CHECK-GI-NEXT: fcmp s8, s10 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: csinv x8, x8, xzr, le ; CHECK-GI-NEXT: csel x9, x22, x9, gt @@ -1930,9 +1930,9 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-LABEL: test_signed_v4f32_v4i100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill @@ -1950,11 +1950,11 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -80 ; CHECK-SD-NEXT: .cfi_offset b9, -88 ; CHECK-SD-NEXT: .cfi_offset b10, -96 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #241, lsl #24 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 @@ -1972,7 +1972,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-NEXT: csel x20, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt @@ -1980,12 +1980,12 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-NEXT: csel x9, x26, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le ; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: csel x21, xzr, x8, vs ; CHECK-SD-NEXT: csel x22, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s0, s9 ; CHECK-SD-NEXT: mov s8, v0.s[1] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt @@ -2003,14 +2003,14 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: mov x4, x23 ; CHECK-SD-NEXT: mov x5, x24 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x26, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -2026,10 +2026,10 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-GI-LABEL: test_signed_v4f32_v4i100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #128 -; CHECK-GI-NEXT: str d12, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d12, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #24] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #40] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #56] // 8-byte Spill ; CHECK-GI-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill @@ -2049,14 +2049,14 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b10, -96 ; CHECK-GI-NEXT: .cfi_offset b11, -104 ; CHECK-GI-NEXT: .cfi_offset b12, -112 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s9, v0.s[1] ; CHECK-GI-NEXT: mov s10, v0.s[2] ; CHECK-GI-NEXT: mov s8, v0.s[3] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: movi v11.2s, #241, lsl #24 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-GI-NEXT: fmov s12, w8 ; CHECK-GI-NEXT: mov x25, #34359738368 // =0x800000000 @@ -2099,14 +2099,14 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-GI-NEXT: mov x3, x22 ; CHECK-GI-NEXT: mov x4, x23 ; CHECK-GI-NEXT: mov x5, x24 -; CHECK-GI-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #56] // 8-byte Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, x25, x1, lt ; CHECK-GI-NEXT: fcmp s8, s12 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldr d12, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d12, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: csinv x8, x8, xzr, le ; CHECK-GI-NEXT: csel x9, x26, x9, gt @@ -2127,9 +2127,9 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-LABEL: test_signed_v4f32_v4i128: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill @@ -2147,11 +2147,11 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -80 ; CHECK-SD-NEXT: .cfi_offset b9, -88 ; CHECK-SD-NEXT: .cfi_offset b10, -96 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 @@ -2169,7 +2169,7 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: csel x20, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt @@ -2177,12 +2177,12 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: csel x9, x26, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le ; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: csel x21, xzr, x8, vs ; CHECK-SD-NEXT: csel x22, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s0, s9 ; CHECK-SD-NEXT: mov s8, v0.s[1] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt @@ -2200,14 +2200,14 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: mov x4, x23 ; CHECK-SD-NEXT: mov x5, x24 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x26, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -2223,10 +2223,10 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-GI-LABEL: test_signed_v4f32_v4i128: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #128 -; CHECK-GI-NEXT: str d12, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d12, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #24] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #40] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #56] // 8-byte Spill ; CHECK-GI-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill @@ -2246,14 +2246,14 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b10, -96 ; CHECK-GI-NEXT: .cfi_offset b11, -104 ; CHECK-GI-NEXT: .cfi_offset b12, -112 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s9, v0.s[1] ; CHECK-GI-NEXT: mov s10, v0.s[2] ; CHECK-GI-NEXT: mov s8, v0.s[3] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixsfti ; CHECK-GI-NEXT: movi v11.2s, #255, lsl #24 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-GI-NEXT: fmov s12, w8 ; CHECK-GI-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 @@ -2296,14 +2296,14 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-GI-NEXT: mov x3, x22 ; CHECK-GI-NEXT: mov x4, x23 ; CHECK-GI-NEXT: mov x5, x24 -; CHECK-GI-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #56] // 8-byte Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, x25, x1, lt ; CHECK-GI-NEXT: fcmp s8, s12 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldr d12, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d12, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: csinv x8, x8, xzr, le ; CHECK-GI-NEXT: csel x9, x26, x9, gt @@ -2592,9 +2592,9 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-SD-LABEL: test_signed_v2f64_v2i100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 @@ -2606,11 +2606,11 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -48 ; CHECK-SD-NEXT: .cfi_offset b9, -56 ; CHECK-SD-NEXT: .cfi_offset b10, -64 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 ; CHECK-SD-NEXT: fmov d9, x8 ; CHECK-SD-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff @@ -2629,13 +2629,13 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-SD-NEXT: csel x20, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: fcmp d8, d9 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x21, x1, lt ; CHECK-SD-NEXT: fcmp d8, d10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x22, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -2650,9 +2650,9 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-GI-LABEL: test_signed_v2f64_v2i100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 @@ -2664,12 +2664,12 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-GI-NEXT: .cfi_offset b8, -48 ; CHECK-GI-NEXT: .cfi_offset b9, -56 ; CHECK-GI-NEXT: .cfi_offset b10, -64 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __fixdfti ; CHECK-GI-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov x21, #34359738368 // =0x800000000 ; CHECK-GI-NEXT: fmov d9, x8 ; CHECK-GI-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff @@ -2687,13 +2687,13 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-GI-NEXT: csel x20, xzr, x9, vs ; CHECK-GI-NEXT: bl __fixdfti ; CHECK-GI-NEXT: fcmp d8, d9 -; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, x21, x1, lt ; CHECK-GI-NEXT: fcmp d8, d10 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: csinv x8, x8, xzr, le ; CHECK-GI-NEXT: csel x9, x22, x9, gt @@ -2712,9 +2712,9 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-SD-LABEL: test_signed_v2f64_v2i128: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 @@ -2726,11 +2726,11 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -48 ; CHECK-SD-NEXT: .cfi_offset b9, -56 ; CHECK-SD-NEXT: .cfi_offset b10, -64 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 ; CHECK-SD-NEXT: fmov d9, x8 ; CHECK-SD-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff @@ -2749,13 +2749,13 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-SD-NEXT: csel x20, xzr, x9, vs ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: fcmp d8, d9 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x21, x1, lt ; CHECK-SD-NEXT: fcmp d8, d10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x22, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -2770,9 +2770,9 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-GI-LABEL: test_signed_v2f64_v2i128: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 @@ -2784,12 +2784,12 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-GI-NEXT: .cfi_offset b8, -48 ; CHECK-GI-NEXT: .cfi_offset b9, -56 ; CHECK-GI-NEXT: .cfi_offset b10, -64 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __fixdfti ; CHECK-GI-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 ; CHECK-GI-NEXT: fmov d9, x8 ; CHECK-GI-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff @@ -2807,13 +2807,13 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-GI-NEXT: csel x20, xzr, x9, vs ; CHECK-GI-NEXT: bl __fixdfti ; CHECK-GI-NEXT: fcmp d8, d9 -; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, x21, x1, lt ; CHECK-GI-NEXT: fcmp d8, d10 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: csinv x8, x8, xzr, le ; CHECK-GI-NEXT: csel x9, x22, x9, gt @@ -3120,9 +3120,9 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-LABEL: test_signed_v4f16_v4i100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill @@ -3142,12 +3142,12 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: .cfi_offset b10, -96 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 ; CHECK-SD-NEXT: mov x26, #34359738367 // =0x7ffffffff @@ -3165,7 +3165,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt @@ -3179,7 +3179,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt @@ -3197,14 +3197,14 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: mov x4, x23 ; CHECK-SD-NEXT: mov x5, x24 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x26, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -3260,9 +3260,9 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-LABEL: test_signed_v4f16_v4i128: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #112 -; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill @@ -3282,12 +3282,12 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: .cfi_offset b10, -96 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 ; CHECK-SD-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff @@ -3305,7 +3305,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt @@ -3319,7 +3319,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt @@ -3337,14 +3337,14 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: mov x4, x23 ; CHECK-SD-NEXT: mov x5, x24 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x25, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 ; CHECK-SD-NEXT: mov x0, x19 ; CHECK-SD-NEXT: mov x1, x20 -; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x9, x26, x9, gt ; CHECK-SD-NEXT: csinv x8, x8, xzr, le @@ -3845,7 +3845,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i100: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #192 -; CHECK-NEXT: str d10, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str d10, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill @@ -3869,17 +3869,17 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: .cfi_offset b8, -104 ; CHECK-NEXT: .cfi_offset b9, -112 ; CHECK-NEXT: .cfi_offset b10, -128 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov x19, x8 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v10.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov x22, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: mov x23, #34359738367 // =0x7ffffffff @@ -3893,13 +3893,13 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #72] // 8-byte Spill ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #24] // 8-byte Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -3913,7 +3913,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, x22, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt @@ -3924,11 +3924,11 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x26, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #32] // 8-byte Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, x22, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt @@ -3939,11 +3939,11 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x27, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp] // 8-byte Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, x22, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt @@ -3957,7 +3957,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -3970,7 +3970,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, x22, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt @@ -3983,7 +3983,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: csel x29, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr x9, [sp] // 8-byte Folded Reload +; CHECK-NEXT: ldr x9, [sp] // 8-byte Reload ; CHECK-NEXT: extr x8, x24, x28, #28 ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: bfi x25, x21, #36, #28 @@ -3995,7 +3995,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: str x9, [x19, #16] ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Reload ; CHECK-NEXT: stp x29, x25, [x19] ; CHECK-NEXT: stur x10, [x19, #50] ; CHECK-NEXT: lsr x10, x24, #28 @@ -4006,12 +4006,12 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: ldp x14, x12, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: strb w11, [x19, #24] ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x13, [sp, #24] // 8-byte Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: bfi x8, x28, #36, #28 ; CHECK-NEXT: extr x10, x14, x12, #28 ; CHECK-NEXT: bfi x27, x12, #36, #28 -; CHECK-NEXT: ldr x12, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: ldr x12, [sp, #72] // 8-byte Reload ; CHECK-NEXT: bfi x26, x13, #36, #28 ; CHECK-NEXT: stur x9, [x19, #25] ; CHECK-NEXT: lsr x9, x14, #28 @@ -4025,7 +4025,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: strb w9, [x19, #99] ; CHECK-NEXT: strb w8, [x19, #74] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload @@ -4042,7 +4042,7 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-LABEL: test_signed_v8f16_v8i128: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #192 -; CHECK-SD-NEXT: str d10, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: str d10, [sp, #64] // 8-byte Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill @@ -4066,16 +4066,16 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -104 ; CHECK-SD-NEXT: .cfi_offset b9, -112 ; CHECK-SD-NEXT: .cfi_offset b10, -128 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: mov x19, x8 ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: fmov s10, w8 ; CHECK-SD-NEXT: mov x22, #-9223372036854775808 // =0x8000000000000000 ; CHECK-SD-NEXT: mov x23, #9223372036854775807 // =0x7fffffffffffffff @@ -4089,13 +4089,13 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fcmp s8, s8 ; CHECK-SD-NEXT: fcvt s8, h0 ; CHECK-SD-NEXT: csel x8, xzr, x8, vs -; CHECK-SD-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x8, [sp, #72] // 8-byte Spill ; CHECK-SD-NEXT: csel x8, xzr, x9, vs ; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x8, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x22, x1, lt @@ -4110,7 +4110,7 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x22, x1, lt @@ -4120,13 +4120,13 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fcmp s8, s8 ; CHECK-SD-NEXT: fcvt s8, h0 ; CHECK-SD-NEXT: csel x8, xzr, x8, vs -; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: csel x8, xzr, x9, vs ; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x8, [sp] // 8-byte Spill ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x22, x1, lt ; CHECK-SD-NEXT: fcmp s8, s10 @@ -4139,7 +4139,7 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x22, x1, lt @@ -4153,7 +4153,7 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x22, x1, lt @@ -4167,7 +4167,7 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, x22, x1, lt @@ -4194,20 +4194,20 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: csel x9, xzr, x9, vs ; CHECK-SD-NEXT: csel x8, xzr, x8, vs ; CHECK-SD-NEXT: stp x8, x9, [x19, #48] -; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #104] -; CHECK-SD-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #96] -; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #88] -; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #80] -; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #72] -; CHECK-SD-NEXT: ldr x8, [sp, #72] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #72] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #64] ; CHECK-SD-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #64] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload @@ -5085,47 +5085,47 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-SD-NEXT: adrp x8, .LCPI86_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfdi ; CHECK-SD-NEXT: adrp x8, .LCPI86_1 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_1] ; CHECK-SD-NEXT: mov x20, #-9223372036854775808 // =0x8000000000000000 ; CHECK-SD-NEXT: csel x19, x20, x0, mi -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov x21, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel x19, x21, x19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x19, ne ; CHECK-SD-NEXT: fmov d0, x8 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixtfdi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel x19, x20, x0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel x19, x21, x19, gt ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __unordtf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x19, ne ; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov d0, x8 @@ -5148,13 +5148,13 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: .cfi_offset w23, -40 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI86_1 -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1] -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4594234569871327232 // =0xc03e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 @@ -5165,7 +5165,7 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI86_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0] -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: mov x22, #-1125899906842624 // =0xfffc000000000000 ; CHECK-GI-NEXT: cmp w0, #0 @@ -5175,18 +5175,18 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x21, x23, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfdi -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: mov x19, x0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel x21, xzr, x19, ne ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -5200,7 +5200,7 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x23, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfdi -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov x19, x0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll index 8abad4419663c..0ad09d416ce68 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -120,14 +120,14 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind { ; CHECK-SD-LABEL: test_unsigned_i100_f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s8, s0 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, xzr, x1, lt ; CHECK-SD-NEXT: fcmp s8, s0 @@ -139,14 +139,14 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind { ; CHECK-GI-LABEL: test_unsigned_i100_f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov s8, s0 ; CHECK-GI-NEXT: bl __fixunssfti ; CHECK-GI-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-GI-NEXT: fcmp s8, #0.0 ; CHECK-GI-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt ; CHECK-GI-NEXT: fcmp s8, s0 @@ -162,12 +162,12 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind { ; CHECK-SD-LABEL: test_unsigned_i128_f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s8, s0 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt @@ -180,12 +180,12 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind { ; CHECK-GI-LABEL: test_unsigned_i128_f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov s8, s0 ; CHECK-GI-NEXT: bl __fixunssfti ; CHECK-GI-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-GI-NEXT: fcmp s8, #0.0 -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt @@ -314,14 +314,14 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind { ; CHECK-SD-LABEL: test_unsigned_i100_f64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov d8, d0 ; CHECK-SD-NEXT: bl __fixunsdfti ; CHECK-SD-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-SD-NEXT: fcmp d8, #0.0 ; CHECK-SD-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-SD-NEXT: fmov d0, x8 -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, xzr, x1, lt ; CHECK-SD-NEXT: fcmp d8, d0 @@ -333,14 +333,14 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind { ; CHECK-GI-LABEL: test_unsigned_i100_f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov d8, d0 ; CHECK-GI-NEXT: bl __fixunsdfti ; CHECK-GI-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-GI-NEXT: fcmp d8, #0.0 ; CHECK-GI-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-GI-NEXT: fmov d0, x8 -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt ; CHECK-GI-NEXT: fcmp d8, d0 @@ -356,12 +356,12 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind { ; CHECK-SD-LABEL: test_unsigned_i128_f64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov d8, d0 ; CHECK-SD-NEXT: bl __fixunsdfti ; CHECK-SD-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-SD-NEXT: fcmp d8, #0.0 -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt @@ -374,12 +374,12 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind { ; CHECK-GI-LABEL: test_unsigned_i128_f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: fmov d8, d0 ; CHECK-GI-NEXT: bl __fixunsdfti ; CHECK-GI-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-GI-NEXT: fcmp d8, #0.0 -; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt @@ -686,14 +686,14 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, xzr, x1, lt ; CHECK-SD-NEXT: fcmp s8, s0 @@ -723,12 +723,12 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt @@ -763,14 +763,14 @@ define i32 @test_unsigned_f128_i32(fp128 %f) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: adrp x8, .LCPI30_0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: adrp x8, .LCPI30_1 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] ; CHECK-SD-NEXT: csel w19, wzr, w0, mi @@ -784,17 +784,17 @@ define i32 @test_unsigned_f128_i32(fp128 %f) { ; CHECK-GI-LABEL: test_unsigned_f128_i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt @@ -806,7 +806,7 @@ define i32 @test_unsigned_f128_i32(fp128 %f) { ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, x19, xzr, mi ; CHECK-GI-NEXT: mov v0.d[0], x8 ; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 5a66b68af8e96..cd16988f10313 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -452,14 +452,14 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: adrp x8, .LCPI14_1 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-SD-NEXT: csel w19, wzr, w0, mi @@ -474,17 +474,17 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-GI-LABEL: test_unsigned_v1f128_v1i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt @@ -505,7 +505,7 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f) @@ -516,7 +516,7 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-SD-LABEL: test_unsigned_v2f128_v2i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w19, -8 @@ -527,33 +527,33 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: adrp x8, .LCPI15_1 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-SD-NEXT: csel w19, wzr, w0, mi -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csinv w20, w19, wzr, le ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, wzr, w0, mi ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-SD-NEXT: csinv w8, w19, wzr, le ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov v0.s[1], w20 @@ -565,7 +565,7 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-LABEL: test_unsigned_v2f128_v2i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 @@ -575,12 +575,12 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: .cfi_offset w22, -32 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt @@ -590,7 +590,7 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x21, #281474976579584 // =0xfffffffe0000 @@ -603,9 +603,9 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -622,7 +622,7 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: fmov s0, w19 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v0.s[1], w0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: add sp, sp, #96 @@ -646,41 +646,41 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: mov v0.16b, v2.16b ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: adrp x8, .LCPI16_1 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-SD-NEXT: csel w19, wzr, w0, mi -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csinv w20, w19, wzr, le ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, wzr, w0, mi ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csinv w21, w19, wzr, le ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, wzr, w0, mi ; CHECK-SD-NEXT: bl __gttf2 @@ -712,7 +712,7 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: stp q1, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt @@ -722,7 +722,7 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x21, #281474976579584 // =0xfffffffe0000 @@ -732,8 +732,8 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x21, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload @@ -754,9 +754,9 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w20, w0 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x22, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -786,7 +786,7 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-LABEL: test_unsigned_v4f128_v4i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #128 -; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 128 ; CHECK-SD-NEXT: .cfi_offset w19, -8 @@ -795,44 +795,44 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v2.16b, v1.16b ; CHECK-SD-NEXT: adrp x8, .LCPI17_0 -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-SD-NEXT: str q3, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q3, [sp, #80] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: adrp x8, .LCPI17_1 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-SD-NEXT: csel w19, wzr, w0, mi -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csinv w20, w19, wzr, le ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, wzr, w0, mi ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: csinv w8, w19, wzr, le ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: mov v0.s[1], w20 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload @@ -840,23 +840,23 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-SD-NEXT: csel w19, wzr, w0, mi ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: csinv w8, w19, wzr, le ; CHECK-SD-NEXT: mov v0.s[2], w8 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfsi -; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel w19, wzr, w0, mi ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-SD-NEXT: csinv w8, w19, wzr, le ; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.s[3], w8 @@ -866,7 +866,7 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-LABEL: test_unsigned_v4f128_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #144 -; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-GI-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill @@ -881,11 +881,11 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 ; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q3, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: str q3, [sp, #32] // 16-byte Spill +; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt @@ -895,7 +895,7 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976579584 // =0xfffffffe0000 @@ -905,13 +905,13 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x20, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -925,13 +925,13 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x21, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov w20, w0 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x21, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] @@ -945,8 +945,8 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: csel x8, x23, x22, mi ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov w21, w0 ; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload @@ -966,7 +966,7 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: fmov s0, w19 ; CHECK-GI-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-GI-NEXT: mov v0.s[1], w20 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[2], w21 @@ -1317,10 +1317,10 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -40 ; CHECK-SD-NEXT: .cfi_offset b9, -48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-SD-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-SD-NEXT: fmov s9, w8 @@ -1361,11 +1361,11 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b8, -40 ; CHECK-GI-NEXT: .cfi_offset b9, -48 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixunssfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-GI-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-GI-NEXT: fmov s9, w8 @@ -1399,7 +1399,7 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w19, -8 @@ -1408,10 +1408,10 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset b8, -40 ; CHECK-SD-NEXT: .cfi_offset b9, -48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-SD-NEXT: fmov s9, w8 ; CHECK-SD-NEXT: mov s8, v0.s[1] @@ -1424,7 +1424,7 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-SD-NEXT: csinv x20, x8, xzr, le ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt ; CHECK-SD-NEXT: fcmp s8, s9 @@ -1441,7 +1441,7 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -1450,11 +1450,11 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b8, -40 ; CHECK-GI-NEXT: .cfi_offset b9, -48 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixunssfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-GI-NEXT: fmov s9, w8 ; CHECK-GI-NEXT: fcmp s0, #0.0 @@ -1466,7 +1466,7 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-GI-NEXT: csinv x20, x9, xzr, le ; CHECK-GI-NEXT: bl __fixunssfti ; CHECK-GI-NEXT: fcmp s8, #0.0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt ; CHECK-GI-NEXT: fcmp s8, s9 @@ -1626,10 +1626,10 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset w30, -64 ; CHECK-SD-NEXT: .cfi_offset b8, -72 ; CHECK-SD-NEXT: .cfi_offset b9, -80 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-SD-NEXT: mov x25, #68719476735 // =0xfffffffff ; CHECK-SD-NEXT: fmov s9, w8 @@ -1643,17 +1643,17 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-SD-NEXT: csinv x20, x8, xzr, le ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, xzr, x1, lt ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: csel x21, x25, x9, gt ; CHECK-SD-NEXT: csinv x22, x8, xzr, le ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s8, v0.s[1] ; CHECK-SD-NEXT: fcmp s0, #0.0 ; CHECK-SD-NEXT: csel x8, xzr, x0, lt @@ -1705,13 +1705,13 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b9, -80 ; CHECK-GI-NEXT: .cfi_offset b10, -88 ; CHECK-GI-NEXT: .cfi_offset b11, -96 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixunssfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-GI-NEXT: mov x25, #68719476735 // =0xfffffffff ; CHECK-GI-NEXT: fmov s11, w8 @@ -1768,7 +1768,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #96 ; CHECK-SD-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill @@ -1782,10 +1782,10 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: .cfi_offset w30, -64 ; CHECK-SD-NEXT: .cfi_offset b8, -72 ; CHECK-SD-NEXT: .cfi_offset b9, -80 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-SD-NEXT: fmov s9, w8 ; CHECK-SD-NEXT: mov s8, v0.s[1] @@ -1798,17 +1798,17 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: csinv x20, x8, xzr, le ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt ; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: csinv x21, x9, xzr, le ; CHECK-SD-NEXT: csinv x22, x8, xzr, le ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s8, v0.s[1] ; CHECK-SD-NEXT: fcmp s0, #0.0 ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -1823,7 +1823,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: mov x4, x23 ; CHECK-SD-NEXT: mov x5, x24 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt @@ -1843,7 +1843,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-GI-NEXT: sub sp, sp, #112 ; CHECK-GI-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill @@ -1859,13 +1859,13 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-GI-NEXT: .cfi_offset b9, -80 ; CHECK-GI-NEXT: .cfi_offset b10, -88 ; CHECK-GI-NEXT: .cfi_offset b11, -96 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] ; CHECK-GI-NEXT: mov s10, v0.s[3] ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl __fixunssfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-GI-NEXT: fmov s11, w8 ; CHECK-GI-NEXT: fcmp s0, #0.0 @@ -1897,7 +1897,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-GI-NEXT: mov x3, x22 ; CHECK-GI-NEXT: mov x4, x23 ; CHECK-GI-NEXT: mov x5, x24 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt @@ -2145,10 +2145,10 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: .cfi_offset b8, -40 ; CHECK-SD-NEXT: .cfi_offset b9, -48 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __fixunsdfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-SD-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-SD-NEXT: fmov d9, x8 @@ -2188,11 +2188,11 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: .cfi_offset b8, -40 ; CHECK-GI-NEXT: .cfi_offset b9, -48 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __fixunsdfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-GI-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-GI-NEXT: fmov d9, x8 @@ -2226,7 +2226,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w19, -8 @@ -2234,10 +2234,10 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: .cfi_offset b8, -40 ; CHECK-SD-NEXT: .cfi_offset b9, -48 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __fixunsdfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-SD-NEXT: fmov d9, x8 ; CHECK-SD-NEXT: mov d8, v0.d[1] @@ -2250,7 +2250,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-SD-NEXT: csinv x20, x8, xzr, le ; CHECK-SD-NEXT: bl __fixunsdfti ; CHECK-SD-NEXT: fcmp d8, #0.0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt ; CHECK-SD-NEXT: fcmp d8, d9 @@ -2267,7 +2267,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -2275,11 +2275,11 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: .cfi_offset b8, -40 ; CHECK-GI-NEXT: .cfi_offset b9, -48 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl __fixunsdfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-GI-NEXT: fmov d9, x8 ; CHECK-GI-NEXT: fcmp d0, #0.0 @@ -2291,7 +2291,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-GI-NEXT: csinv x20, x9, xzr, le ; CHECK-GI-NEXT: bl __fixunsdfti ; CHECK-GI-NEXT: fcmp d8, #0.0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: csel x8, xzr, x0, lt ; CHECK-GI-NEXT: csel x9, xzr, x1, lt ; CHECK-GI-NEXT: fcmp d8, d9 @@ -2556,11 +2556,11 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: .cfi_offset b9, -80 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s8, h1 ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: fmov s9, w8 @@ -2575,7 +2575,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x0, lt ; CHECK-SD-NEXT: csel x9, xzr, x1, lt ; CHECK-SD-NEXT: fcmp s8, s9 @@ -2584,7 +2584,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-SD-NEXT: csinv x22, x8, xzr, le ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x0, lt @@ -2659,7 +2659,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #96 ; CHECK-SD-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill @@ -2675,10 +2675,10 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: .cfi_offset b9, -80 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: fmov s9, w8 @@ -2691,7 +2691,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: csinv x20, x9, xzr, le ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -2702,7 +2702,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: csinv x22, x8, xzr, le ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -2718,7 +2718,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: mov x4, x23 ; CHECK-SD-NEXT: mov x5, x24 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt @@ -3184,15 +3184,15 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: .cfi_offset w29, -96 ; CHECK-NEXT: .cfi_offset b8, -104 ; CHECK-NEXT: .cfi_offset b9, -112 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov x19, x8 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: fmov s9, w8 @@ -3208,7 +3208,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -3218,7 +3218,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -3227,10 +3227,10 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x25, x23, x9, gt -; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #32] // 8-byte Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -3241,7 +3241,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: csinv x28, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -3253,7 +3253,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -3262,7 +3262,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: csinv x27, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -3280,7 +3280,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: extr x9, x29, x20, #28 ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: stur x8, [x19, #41] -; CHECK-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x11, [sp, #32] // 8-byte Reload ; CHECK-NEXT: stp x22, x9, [x19, #8] ; CHECK-NEXT: lsr x9, x29, #28 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -3348,14 +3348,14 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: .cfi_offset w29, -96 ; CHECK-SD-NEXT: .cfi_offset b8, -104 ; CHECK-SD-NEXT: .cfi_offset b9, -112 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: mov x19, x8 ; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: fmov s9, w8 @@ -3369,7 +3369,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -3381,7 +3381,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -3393,7 +3393,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: fcmp s8, #0.0 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: csel x8, xzr, x1, lt ; CHECK-SD-NEXT: csel x9, xzr, x0, lt ; CHECK-SD-NEXT: fcmp s8, s9 @@ -3402,7 +3402,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: csinv x27, x8, xzr, le ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -3413,7 +3413,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: csinv x29, x8, xzr, le ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -3424,7 +3424,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: csinv x21, x8, xzr, le ; CHECK-SD-NEXT: fmov s0, s8 ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: fcmp s8, #0.0 ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: csel x8, xzr, x1, lt @@ -3447,13 +3447,13 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-SD-NEXT: csinv x8, x8, xzr, le ; CHECK-SD-NEXT: csinv x9, x9, xzr, le ; CHECK-SD-NEXT: stp x9, x8, [x19, #48] -; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #88] -; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #80] -; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #72] -; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: str x8, [x19, #64] ; CHECK-SD-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload @@ -4144,35 +4144,35 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-SD-NEXT: adrp x8, .LCPI86_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0] ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfdi ; CHECK-SD-NEXT: adrp x8, .LCPI86_1 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_1] ; CHECK-SD-NEXT: csel x19, xzr, x0, mi -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: csinv x8, x19, xzr, le ; CHECK-SD-NEXT: fmov d0, x8 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: bl __getf2 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov w19, w0 ; CHECK-SD-NEXT: bl __fixunstfdi -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: cmp w19, #0 ; CHECK-SD-NEXT: csel x19, xzr, x0, mi ; CHECK-SD-NEXT: bl __gttf2 ; CHECK-SD-NEXT: cmp w0, #0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: csinv x8, x19, xzr, le ; CHECK-SD-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov d0, x8 @@ -4194,12 +4194,12 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: .cfi_offset w23, -40 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI86_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1] ; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x19, x8, xzr, gt @@ -4209,7 +4209,7 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI86_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0] -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: mov x21, #-562949953421312 // =0xfffe000000000000 ; CHECK-GI-NEXT: cmp w0, #0 @@ -4222,9 +4222,9 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov x19, x0 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index 1f84c944d7c16..da19991d56259 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -92,19 +92,19 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) { ; CHECK-SD-LABEL: fptrunc_v2f128_v2f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __trunctfhf2 ; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __trunctfhf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -113,26 +113,26 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) { ; CHECK-GI-LABEL: fptrunc_v2f128_v2f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: mov v2.d[0], x8 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov v2.d[1], x8 -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfhf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: add sp, sp, #64 @@ -147,19 +147,19 @@ define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) { ; CHECK-SD-LABEL: fptrunc_v2f128_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __trunctfsf2 ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __trunctfsf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -168,18 +168,18 @@ define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) { ; CHECK-GI-LABEL: fptrunc_v2f128_v2f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __trunctfsf2 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfsf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #48 @@ -194,19 +194,19 @@ define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) { ; CHECK-SD-LABEL: fptrunc_v2f128_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: bl __trunctfdf2 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: bl __trunctfdf2 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -214,18 +214,18 @@ define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) { ; CHECK-GI-LABEL: fptrunc_v2f128_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl __trunctfdf2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __trunctfdf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #48 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir index e970d8339d792..a7bde2a9f1c92 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir @@ -10,7 +10,7 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG - ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill + ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16 ; CHECK-NEXT: addvl sp, sp, #-1 @@ -20,7 +20,7 @@ ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload - ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload + ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll index 179df026e25d6..2d2004e068060 100644 --- a/llvm/test/CodeGen/AArch64/frem-power2.ll +++ b/llvm/test/CodeGen/AArch64/frem-power2.ll @@ -431,9 +431,9 @@ define <4 x float> @frem2_vec(<4 x float> %x) { ; CHECK-GI-LABEL: frem2_vec: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -446,27 +446,27 @@ define <4 x float> @frem2_vec(<4 x float> %x) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, #2.00000000 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, #2.00000000 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, #2.00000000 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -490,9 +490,9 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) { ; CHECK-GI-LABEL: frem2_nsz_vec: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -505,27 +505,27 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, #2.00000000 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, #2.00000000 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, #2.00000000 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -554,7 +554,7 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) { ; CHECK-GI-NEXT: sub sp, sp, #96 ; CHECK-GI-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -571,27 +571,27 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b diff --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll index feb13da64cbf8..b72af95adf009 100644 --- a/llvm/test/CodeGen/AArch64/frem.ll +++ b/llvm/test/CodeGen/AArch64/frem.ll @@ -66,7 +66,7 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-SD-LABEL: frem_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill @@ -74,14 +74,14 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: bl fmod ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-SD-NEXT: bl fmod -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -90,7 +90,7 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -101,14 +101,14 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: bl fmod ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d1, d9 ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl fmod -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #48 @@ -124,7 +124,7 @@ define <3 x double> @frem_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-SD-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w30, -8 ; CHECK-SD-NEXT: .cfi_offset b8, -16 @@ -150,7 +150,7 @@ define <3 x double> @frem_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-SD-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov d2, d0 -; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-SD-NEXT: fmov d0, d12 ; CHECK-SD-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret @@ -160,7 +160,7 @@ define <3 x double> @frem_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-GI-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -186,7 +186,7 @@ define <3 x double> @frem_v3f64(<3 x double> %a, <3 x double> %b) { ; CHECK-GI-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov d2, d0 -; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-GI-NEXT: fmov d0, d12 ; CHECK-GI-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret @@ -199,7 +199,7 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-SD-LABEL: frem_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill @@ -208,28 +208,28 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-SD-NEXT: mov d1, v2.d[1] ; CHECK-SD-NEXT: bl fmod ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-SD-NEXT: bl fmod -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: mov d1, v1.d[1] ; CHECK-SD-NEXT: bl fmod ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-SD-NEXT: bl fmod ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp, #16] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #96 ; CHECK-SD-NEXT: ret @@ -239,7 +239,7 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-GI-NEXT: sub sp, sp, #112 ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -247,9 +247,9 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-GI-NEXT: .cfi_offset b10, -40 ; CHECK-GI-NEXT: .cfi_offset b11, -48 ; CHECK-GI-NEXT: mov v4.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: str q3, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q3, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d10, v2.d[1] ; CHECK-GI-NEXT: mov d11, v3.d[1] @@ -258,27 +258,27 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-GI-NEXT: mov d9, v4.d[1] ; CHECK-GI-NEXT: bl fmod ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov d1, d10 ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl fmod ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl fmod ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d1, d11 ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl fmod ; CHECK-GI-NEXT: ldp q3, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] @@ -294,7 +294,7 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-SD-LABEL: frem_v2f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -304,14 +304,14 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #64 @@ -321,7 +321,7 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 ; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -334,14 +334,14 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) { ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s9 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #48 @@ -355,7 +355,7 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-SD-LABEL: frem_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill @@ -363,22 +363,22 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #64 @@ -389,7 +389,7 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -404,18 +404,18 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) { ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s10 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] @@ -432,7 +432,7 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-SD-LABEL: frem_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill @@ -440,30 +440,30 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] ; CHECK-SD-NEXT: mov s1, v1.s[3] ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #64 @@ -475,7 +475,7 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -494,27 +494,27 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s12 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s13 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] @@ -530,7 +530,7 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-SD-LABEL: frem_v8f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill @@ -539,61 +539,61 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-SD-NEXT: mov s1, v2.s[1] ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] ; CHECK-SD-NEXT: mov s1, v1.s[3] ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: mov s1, v1.s[1] ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: mov s1, v1.s[2] ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] ; CHECK-SD-NEXT: mov s1, v1.s[3] ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #96 ; CHECK-SD-NEXT: ret @@ -605,7 +605,7 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #160] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #176] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #192] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #192] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 208 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -631,63 +631,63 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-GI-NEXT: mov s15, v1.s[2] ; CHECK-GI-NEXT: mov s13, v1.s[3] ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 -; CHECK-GI-NEXT: str s2, [sp, #48] // 4-byte Folded Spill +; CHECK-GI-NEXT: str s2, [sp, #48] // 4-byte Spill ; CHECK-GI-NEXT: mov s2, v4.s[2] -; CHECK-GI-NEXT: str s2, [sp, #112] // 4-byte Folded Spill +; CHECK-GI-NEXT: str s2, [sp, #112] // 4-byte Spill ; CHECK-GI-NEXT: mov s2, v3.s[3] ; CHECK-GI-NEXT: stp s2, s5, [sp, #200] // 8-byte Folded Spill ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s14 ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s15 ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s1, s13 ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fmov s1, s12 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr s0, [sp, #48] // 4-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr s0, [sp, #48] // 4-byte Reload ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fmov s1, s11 ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Reload ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: ldp s1, s0, [sp, #200] // 8-byte Folded Reload ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #176] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #192] // 8-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -704,7 +704,7 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-LABEL: frem_v7f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h3, v0.h[1] @@ -714,7 +714,7 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h2 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 @@ -724,15 +724,15 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: mov h1, v1.h[3] @@ -740,9 +740,9 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] ; CHECK-SD-NEXT: mov h1, v1.h[4] @@ -750,9 +750,9 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] ; CHECK-SD-NEXT: mov h1, v1.h[5] @@ -760,9 +760,9 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] ; CHECK-SD-NEXT: mov h1, v1.h[6] @@ -770,9 +770,9 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] ; CHECK-SD-NEXT: mov h1, v1.h[7] @@ -780,8 +780,8 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -793,7 +793,7 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -813,71 +813,71 @@ define <7 x half> @frem_v7f16(<7 x half> %a, <7 x half> %b) { ; CHECK-GI-NEXT: mov h15, v1.h[2] ; CHECK-GI-NEXT: mov h8, v1.h[3] ; CHECK-GI-NEXT: mov h13, v1.h[4] -; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v0.h[6] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h2, [sp, #80] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #80] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[5] -; CHECK-GI-NEXT: str h2, [sp, #172] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #172] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[6] ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str h2, [sp, #174] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #174] // 2-byte Spill ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h14 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h15 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h8 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h13 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #172] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #172] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -892,7 +892,7 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-SD-LABEL: frem_v4f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -903,38 +903,38 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-SD-NEXT: fcvt s0, h3 ; CHECK-SD-NEXT: fcvt s1, h2 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s2, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s1, h0 ; CHECK-SD-NEXT: fmov s0, s2 ; CHECK-SD-NEXT: bl fmodf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h3, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov h2, v0.h[2] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v3.h[1], v0.h[0] ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: fcvt s1, h2 -; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q3, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q3, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q3, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: mov h2, v2.h[3] ; CHECK-SD-NEXT: mov v3.h[2], v0.h[0] ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: fcvt s1, h2 -; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q3, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #64 @@ -946,7 +946,7 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -969,27 +969,27 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-GI-NEXT: fcvt s2, h8 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h11 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h9 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h12 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h10 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h13 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] @@ -1007,7 +1007,7 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-LABEL: frem_v8f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov h3, v0.h[1] @@ -1017,7 +1017,7 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h2 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 @@ -1027,15 +1027,15 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: mov h1, v1.h[3] @@ -1043,9 +1043,9 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] ; CHECK-SD-NEXT: mov h1, v1.h[4] @@ -1053,9 +1053,9 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] ; CHECK-SD-NEXT: mov h1, v1.h[5] @@ -1063,9 +1063,9 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] ; CHECK-SD-NEXT: mov h1, v1.h[6] @@ -1073,9 +1073,9 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] ; CHECK-SD-NEXT: mov h1, v1.h[7] @@ -1083,8 +1083,8 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -1096,7 +1096,7 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-GI-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #176] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #176] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 192 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -1116,84 +1116,84 @@ define <8 x half> @frem_v8f16(<8 x half> %a, <8 x half> %b) { ; CHECK-GI-NEXT: mov h9, v1.h[2] ; CHECK-GI-NEXT: mov h10, v1.h[3] ; CHECK-GI-NEXT: mov h15, v1.h[4] -; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v0.h[6] -; CHECK-GI-NEXT: str h2, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h2, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[5] -; CHECK-GI-NEXT: str h2, [sp, #186] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #186] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[6] -; CHECK-GI-NEXT: str h2, [sp, #188] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #188] // 2-byte Spill ; CHECK-GI-NEXT: mov h2, v1.h[7] ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str h2, [sp, #190] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h2, [sp, #190] // 2-byte Spill ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h11 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h8 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h9 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h10 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h14 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h15 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #186] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #186] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #188] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #188] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #190] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #190] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -1208,7 +1208,7 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-LABEL: frem_v16f16: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #96 -; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill @@ -1219,10 +1219,10 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s0, h3 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 @@ -1230,70 +1230,70 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[4] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[5] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[6] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov h1, v1.h[7] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: mov h1, v1.h[1] @@ -1301,7 +1301,7 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: fcvt s1, h1 @@ -1311,15 +1311,15 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] ; CHECK-SD-NEXT: mov h1, v1.h[3] @@ -1327,9 +1327,9 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] ; CHECK-SD-NEXT: mov h1, v1.h[4] @@ -1337,9 +1337,9 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] ; CHECK-SD-NEXT: mov h1, v1.h[5] @@ -1347,9 +1347,9 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] ; CHECK-SD-NEXT: mov h1, v1.h[6] @@ -1357,9 +1357,9 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] ; CHECK-SD-NEXT: mov h1, v1.h[7] @@ -1367,7 +1367,7 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-SD-NEXT: fcvt s1, h1 ; CHECK-SD-NEXT: bl fmodf ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #48] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -1394,203 +1394,203 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-GI-NEXT: .cfi_offset b14, -72 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: mov v4.16b, v1.16b -; CHECK-GI-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[4] ; CHECK-GI-NEXT: mov h12, v0.h[1] ; CHECK-GI-NEXT: mov h13, v0.h[2] -; CHECK-GI-NEXT: str q3, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q3, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov h14, v0.h[3] ; CHECK-GI-NEXT: mov h15, v2.h[1] ; CHECK-GI-NEXT: mov h8, v2.h[2] ; CHECK-GI-NEXT: mov h9, v2.h[3] ; CHECK-GI-NEXT: mov h10, v2.h[4] ; CHECK-GI-NEXT: mov h11, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #272] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #272] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[5] -; CHECK-GI-NEXT: str h1, [sp, #240] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #240] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[6] -; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v0.h[7] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str h1, [sp, #144] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #144] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[1] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[2] -; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[3] -; CHECK-GI-NEXT: str h1, [sp, #128] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #128] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[4] -; CHECK-GI-NEXT: str h1, [sp, #192] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #192] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[5] -; CHECK-GI-NEXT: str h1, [sp, #256] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #256] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[6] -; CHECK-GI-NEXT: str h1, [sp, #336] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #336] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v4.h[7] -; CHECK-GI-NEXT: str h1, [sp, #352] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #352] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #12] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #12] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #14] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #14] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[1] -; CHECK-GI-NEXT: str h1, [sp, #44] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #44] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[2] -; CHECK-GI-NEXT: str h1, [sp, #46] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #46] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[3] -; CHECK-GI-NEXT: str h1, [sp, #78] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #78] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[4] -; CHECK-GI-NEXT: str h1, [sp, #110] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #110] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[5] -; CHECK-GI-NEXT: str h1, [sp, #174] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #174] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[6] -; CHECK-GI-NEXT: str h1, [sp, #238] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #238] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v3.h[7] -; CHECK-GI-NEXT: str h1, [sp, #302] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #302] // 2-byte Spill ; CHECK-GI-NEXT: fcvt s1, h2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h12 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h15 -; CHECK-GI-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #304] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h13 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h8 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf ; CHECK-GI-NEXT: fcvt s2, h14 ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h9 -; CHECK-GI-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #320] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #272] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #272] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 ; CHECK-GI-NEXT: fcvt s1, h10 -; CHECK-GI-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #272] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #240] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #240] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 ; CHECK-GI-NEXT: fcvt s1, h11 -; CHECK-GI-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #240] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #12] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #12] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #144] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #144] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #14] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #44] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #44] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #46] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #46] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #128] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #128] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #78] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #78] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #192] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #192] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #110] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #110] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #256] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #256] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #256] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #336] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #336] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #238] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #336] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #238] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr h1, [sp, #352] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #352] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s2, h1 -; CHECK-GI-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr h0, [sp, #302] // 2-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #352] // 16-byte Spill +; CHECK-GI-NEXT: ldr h0, [sp, #302] // 2-byte Reload ; CHECK-GI-NEXT: fcvt s1, h0 ; CHECK-GI-NEXT: fmov s0, s2 ; CHECK-GI-NEXT: bl fmodf -; CHECK-GI-NEXT: ldr q3, [sp, #304] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #304] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #432] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #416] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #320] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #320] // 16-byte Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #400] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #384] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #272] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #272] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #368] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] ; CHECK-GI-NEXT: ldp q4, q2, [sp, #240] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[4], v4.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #336] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #336] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #352] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -1605,36 +1605,36 @@ define <2 x fp128> @frem_v2fp128(<2 x fp128> %a, <2 x fp128> %b) { ; CHECK-SD-LABEL: frem_v2fp128: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: bl fmodl -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: bl fmodl ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: frem_v2fp128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl fmodl -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: bl fmodl ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fsincos.ll b/llvm/test/CodeGen/AArch64/fsincos.ll index 98ba908da409e..a3b90765f186a 100644 --- a/llvm/test/CodeGen/AArch64/fsincos.ll +++ b/llvm/test/CodeGen/AArch64/fsincos.ll @@ -58,18 +58,18 @@ define <2 x double> @sin_v2f64(<2 x double> %a) nounwind { ; CHECK-SD-LABEL: sin_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl sin ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl sin -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -77,19 +77,19 @@ define <2 x double> @sin_v2f64(<2 x double> %a) nounwind { ; CHECK-GI-LABEL: sin_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl sin -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -106,7 +106,7 @@ define <3 x double> @sin_v3f64(<3 x double> %a) nounwind { ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-SD-NEXT: fmov d8, d2 ; CHECK-SD-NEXT: fmov d9, d1 -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: bl sin ; CHECK-SD-NEXT: fmov d10, d0 ; CHECK-SD-NEXT: fmov d0, d9 @@ -116,7 +116,7 @@ define <3 x double> @sin_v3f64(<3 x double> %a) nounwind { ; CHECK-SD-NEXT: bl sin ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -128,7 +128,7 @@ define <3 x double> @sin_v3f64(<3 x double> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-GI-NEXT: fmov d8, d1 ; CHECK-GI-NEXT: fmov d9, d2 -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: fmov d10, d0 ; CHECK-GI-NEXT: fmov d0, d8 @@ -138,7 +138,7 @@ define <3 x double> @sin_v3f64(<3 x double> %a) nounwind { ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -152,31 +152,31 @@ define <4 x double> @sin_v4f64(<4 x double> %a) nounwind { ; CHECK-SD-LABEL: sin_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl sin ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl sin -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl sin ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl sin ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -187,29 +187,29 @@ define <4 x double> @sin_v4f64(<4 x double> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl sin ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -224,18 +224,18 @@ define <2 x float> @sin_v2f32(<2 x float> %a) nounwind { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -245,19 +245,19 @@ define <2 x float> @sin_v2f32(<2 x float> %a) nounwind { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -271,25 +271,25 @@ define <3 x float> @sin_v3f32(<3 x float> %a) nounwind { ; CHECK-SD-LABEL: sin_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -301,20 +301,20 @@ define <3 x float> @sin_v3f32(<3 x float> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -330,32 +330,32 @@ define <4 x float> @sin_v4f32(<4 x float> %a) nounwind { ; CHECK-SD-LABEL: sin_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -364,33 +364,33 @@ define <4 x float> @sin_v4f32(<4 x float> %a) nounwind { ; CHECK-GI-LABEL: sin_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: mov s10, v0.s[3] ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -407,57 +407,57 @@ define <8 x float> @sin_v8f32(<8 x float> %a) nounwind { ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -474,53 +474,53 @@ define <8 x float> @sin_v8f32(<8 x float> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -538,66 +538,66 @@ define <7 x half> @sin_v7f16(<7 x half> %a) nounwind { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -615,51 +615,51 @@ define <7 x half> @sin_v7f16(<7 x half> %a) nounwind { ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: mov h9, v0.h[2] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -676,35 +676,35 @@ define <4 x half> @sin_v4f16(<4 x half> %a) nounwind { ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl sinf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -714,35 +714,35 @@ define <4 x half> @sin_v4f16(<4 x half> %a) nounwind { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: mov h10, v0.h[3] ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: mov h9, v0.h[2] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -760,66 +760,66 @@ define <8 x half> @sin_v8f16(<8 x half> %a) nounwind { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -827,7 +827,7 @@ define <8 x half> @sin_v8f16(<8 x half> %a) nounwind { ; CHECK-GI-LABEL: sin_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: mov h14, v0.h[7] ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov h12, v0.h[5] @@ -839,51 +839,51 @@ define <8 x half> @sin_v8f16(<8 x half> %a) nounwind { ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: mov h9, v0.h[2] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -907,125 +907,125 @@ define <16 x half> @sin_v16f16(<16 x half> %a) nounwind { ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl sinf ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -1038,7 +1038,7 @@ define <16 x half> @sin_v16f16(<16 x half> %a) nounwind { ; CHECK-GI-NEXT: stp d15, d14, [sp, #240] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: mov h14, v1.h[1] -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] ; CHECK-GI-NEXT: stp d13, d12, [sp, #256] // 16-byte Folded Spill @@ -1050,134 +1050,134 @@ define <16 x half> @sin_v16f16(<16 x half> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #288] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov h8, v0.h[2] ; CHECK-GI-NEXT: mov h9, v0.h[3] -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] ; CHECK-GI-NEXT: fcvt s0, h0 ; CHECK-GI-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl sinf -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -1192,15 +1192,15 @@ define <2 x fp128> @sin_v2fp128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: sin_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl sinl -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl sinl ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: @@ -1264,18 +1264,18 @@ define <2 x double> @cos_v2f64(<2 x double> %a) nounwind { ; CHECK-SD-LABEL: cos_v2f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl cos ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl cos -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -1283,19 +1283,19 @@ define <2 x double> @cos_v2f64(<2 x double> %a) nounwind { ; CHECK-GI-LABEL: cos_v2f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl cos -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #32 @@ -1312,7 +1312,7 @@ define <3 x double> @cos_v3f64(<3 x double> %a) nounwind { ; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-SD-NEXT: fmov d8, d2 ; CHECK-SD-NEXT: fmov d9, d1 -; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-SD-NEXT: bl cos ; CHECK-SD-NEXT: fmov d10, d0 ; CHECK-SD-NEXT: fmov d0, d9 @@ -1322,7 +1322,7 @@ define <3 x double> @cos_v3f64(<3 x double> %a) nounwind { ; CHECK-SD-NEXT: bl cos ; CHECK-SD-NEXT: fmov d1, d9 ; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-SD-NEXT: fmov d2, d0 ; CHECK-SD-NEXT: fmov d0, d10 ; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -1334,7 +1334,7 @@ define <3 x double> @cos_v3f64(<3 x double> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-GI-NEXT: fmov d8, d1 ; CHECK-GI-NEXT: fmov d9, d2 -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: fmov d10, d0 ; CHECK-GI-NEXT: fmov d0, d8 @@ -1344,7 +1344,7 @@ define <3 x double> @cos_v3f64(<3 x double> %a) nounwind { ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: fmov d1, d8 ; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: fmov d0, d10 ; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -1358,31 +1358,31 @@ define <4 x double> @cos_v4f64(<4 x double> %a) nounwind { ; CHECK-SD-LABEL: cos_v4f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl cos ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl cos -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl cos ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl cos ; CHECK-SD-NEXT: fmov d1, d0 ; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -1393,29 +1393,29 @@ define <4 x double> @cos_v4f64(<4 x double> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov d8, v0.d[1] ; CHECK-GI-NEXT: mov d9, v1.d[1] -; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d9 ; CHECK-GI-NEXT: bl cos ; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: add sp, sp, #80 @@ -1430,18 +1430,18 @@ define <2 x float> @cos_v2f32(<2 x float> %a) nounwind { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1451,19 +1451,19 @@ define <2 x float> @cos_v2f32(<2 x float> %a) nounwind { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #32 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] -; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: add sp, sp, #32 @@ -1477,25 +1477,25 @@ define <3 x float> @cos_v3f32(<3 x float> %a) nounwind { ; CHECK-SD-LABEL: cos_v3f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1507,20 +1507,20 @@ define <3 x float> @cos_v3f32(<3 x float> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] @@ -1536,32 +1536,32 @@ define <4 x float> @cos_v4f32(<4 x float> %a) nounwind { ; CHECK-SD-LABEL: cos_v4f32: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1570,33 +1570,33 @@ define <4 x float> @cos_v4f32(<4 x float> %a) nounwind { ; CHECK-GI-LABEL: cos_v4f32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: mov s10, v0.s[3] ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -1613,57 +1613,57 @@ define <8 x float> @cos_v8f32(<8 x float> %a) nounwind { ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[3] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fmov s2, s0 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret @@ -1680,53 +1680,53 @@ define <8 x float> @cos_v8f32(<8 x float> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov s8, v0.s[1] ; CHECK-GI-NEXT: mov s9, v0.s[2] -; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Spill ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s10 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s11 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s12 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s13 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] ; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] ; CHECK-GI-NEXT: mov v2.16b, v1.16b @@ -1744,66 +1744,66 @@ define <7 x half> @cos_v7f16(<7 x half> %a) nounwind { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -1821,51 +1821,51 @@ define <7 x half> @cos_v7f16(<7 x half> %a) nounwind { ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: mov h9, v0.h[2] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] ; CHECK-GI-NEXT: mov v0.16b, v1.16b @@ -1882,35 +1882,35 @@ define <4 x half> @cos_v4f16(<4 x half> %a) nounwind { ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl cosf -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: add sp, sp, #48 @@ -1920,35 +1920,35 @@ define <4 x half> @cos_v4f16(<4 x half> %a) nounwind { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #80 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: mov h10, v0.h[3] ; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: mov h9, v0.h[2] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Spill ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] @@ -1966,66 +1966,66 @@ define <8 x half> @cos_v8f16(<8 x half> %a) nounwind { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #48 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h1, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret @@ -2033,7 +2033,7 @@ define <8 x half> @cos_v8f16(<8 x half> %a) nounwind { ; CHECK-GI-LABEL: cos_v8f16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Spill ; CHECK-GI-NEXT: mov h14, v0.h[7] ; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov h12, v0.h[5] @@ -2045,51 +2045,51 @@ define <8 x half> @cos_v8f16(<8 x half> %a) nounwind { ; CHECK-GI-NEXT: mov h8, v0.h[1] ; CHECK-GI-NEXT: mov h9, v0.h[2] ; CHECK-GI-NEXT: fcvt s0, h0 -; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Spill ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Reload ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload @@ -2113,125 +2113,125 @@ define <16 x half> @cos_v16f16(<16 x half> %a) nounwind { ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[4] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[5] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[6] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fcvt h0, s0 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov h0, v0.h[7] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fcvt s0, h0 ; CHECK-SD-NEXT: bl cosf ; CHECK-SD-NEXT: fmov s1, s0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h2, s1 ; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] @@ -2244,7 +2244,7 @@ define <16 x half> @cos_v16f16(<16 x half> %a) nounwind { ; CHECK-GI-NEXT: stp d15, d14, [sp, #240] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: mov h14, v1.h[1] -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: mov h1, v1.h[2] ; CHECK-GI-NEXT: mov h15, v0.h[1] ; CHECK-GI-NEXT: stp d13, d12, [sp, #256] // 16-byte Folded Spill @@ -2256,134 +2256,134 @@ define <16 x half> @cos_v16f16(<16 x half> %a) nounwind { ; CHECK-GI-NEXT: stp d9, d8, [sp, #288] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov h8, v0.h[2] ; CHECK-GI-NEXT: mov h9, v0.h[3] -; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[3] ; CHECK-GI-NEXT: fcvt s0, h0 ; CHECK-GI-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill -; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[4] -; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[5] -; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[6] -; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Spill ; CHECK-GI-NEXT: mov h1, v2.h[7] -; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill +; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Spill ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h15 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h8 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h9 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h10 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h11 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h12 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h13 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf ; CHECK-GI-NEXT: fcvt s1, h14 ; CHECK-GI-NEXT: fcvt h0, s0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload +; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Reload ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: fcvt s1, h1 -; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s1 ; CHECK-GI-NEXT: bl cosf -; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Reload +; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] ; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] ; CHECK-GI-NEXT: fcvt h2, s0 -; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] ; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] ; CHECK-GI-NEXT: mov v0.16b, v3.16b @@ -2398,15 +2398,15 @@ define <2 x fp128> @cos_v2fp128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: cos_v2fp128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl cosl -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl cosl ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll index 0f208f8ed9052..50fac819d4afe 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll +++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll @@ -41,7 +41,7 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a ; CHECK-NEXT: add x27, x27, LJTI0_0@PAGEOFF ; CHECK-NEXT: mov w28, #1 ; =0x1 ; CHECK-NEXT: ; implicit-def: $w8 -; CHECK-NEXT: str x8, [sp, #40] ; 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #40] ; 8-byte Spill ; CHECK-NEXT: b LBB0_2 ; CHECK-NEXT: LBB0_1: ; %bb10 ; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 @@ -74,7 +74,7 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a ; CHECK-NEXT: LBB0_6: ; %bb13 ; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: mov w8, #1 ; =0x1 -; CHECK-NEXT: str x8, [sp, #40] ; 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #40] ; 8-byte Spill ; CHECK-NEXT: tbz w19, #0, LBB0_2 ; CHECK-NEXT: ; %bb.7: ; %bb14 ; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 @@ -89,7 +89,7 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: b LBB0_8 ; CHECK-NEXT: LBB0_9: ; %bb16 -; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Reload ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8 diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll index a9167ad6ebb70..9325c8c685560 100644 --- a/llvm/test/CodeGen/AArch64/insertextract.ll +++ b/llvm/test/CodeGen/AArch64/insertextract.ll @@ -1424,33 +1424,33 @@ define <2 x fp128> @insert_v2fp128_0(<2 x fp128> %a, fp128 %b, i32 %c) { ; CHECK-SD-LABEL: insert_v2fp128_0: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __addtf3 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: insert_v2fp128_0: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __addtf3 ; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret entry: @@ -1463,33 +1463,33 @@ define <2 x fp128> @insert_v2fp128_1(<2 x fp128> %a, fp128 %b, i32 %c) { ; CHECK-SD-LABEL: insert_v2fp128_1: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-SD-NEXT: bl __addtf3 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: insert_v2fp128_1: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -16 ; CHECK-GI-NEXT: stp q1, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #64 ; CHECK-GI-NEXT: ret entry: @@ -1506,13 +1506,13 @@ define <2 x fp128> @insert_v2fp128_c(<2 x fp128> %a, fp128 %b, i32 %c) { ; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: mov w19, w0 -; CHECK-SD-NEXT: str q2, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp, #32] // 16-byte Spill ; CHECK-SD-NEXT: bl __addtf3 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: bl __addtf3 ; CHECK-SD-NEXT: str q0, [sp, #64] @@ -1530,32 +1530,32 @@ define <2 x fp128> @insert_v2fp128_c(<2 x fp128> %a, fp128 %b, i32 %c) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-GI-NEXT: sub x9, sp, #96 -; CHECK-GI-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov x29, sp ; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-GI-NEXT: .cfi_def_cfa w29, 32 ; CHECK-GI-NEXT: .cfi_offset w19, -16 ; CHECK-GI-NEXT: .cfi_offset w30, -24 ; CHECK-GI-NEXT: .cfi_offset w29, -32 -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: mov w19, w0 -; CHECK-GI-NEXT: str q2, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #48] // 16-byte Spill ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: mov w8, w19 ; CHECK-GI-NEXT: add x9, sp, #64 ; CHECK-GI-NEXT: and x8, x8, #0x1 ; CHECK-GI-NEXT: stp q1, q0, [sp, #64] -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: str q0, [x9, x8, lsl #4] ; CHECK-GI-NEXT: ldp q0, q1, [sp, #64] ; CHECK-GI-NEXT: mov sp, x29 -; CHECK-GI-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll index 46bd414568e05..f4fdd52f117d1 100644 --- a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll +++ b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll @@ -35,7 +35,7 @@ define @match_nxv16i8_v4i8( %op1, <4 x i8> ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 @@ -54,7 +54,7 @@ define @match_nxv16i8_v4i8( %op1, <4 x i8> ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b ; CHECK-NEXT: mov p2.b, p3/m, p3.b ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov p1.b, p2/m, p2.b ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b ; CHECK-NEXT: addvl sp, sp, #1 @@ -235,7 +235,7 @@ define @match_nxv16i8_v32i8( %op1, <32 x i8 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 @@ -335,7 +335,7 @@ define @match_nxv16i8_v32i8( %op1, <32 x i8 ; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z1.b ; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov p1.b, p2/m, p2.b ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b ; CHECK-NEXT: addvl sp, sp, #1 @@ -457,7 +457,7 @@ define @match_nxv4xi32_v4i32( %op1, <4 x i32 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 @@ -472,7 +472,7 @@ define @match_nxv4xi32_v4i32( %op1, <4 x i32 ; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s ; CHECK-NEXT: mov p2.b, p3/m, p3.b ; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov p1.b, p2/m, p2.b ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index caf87a13f283b..fce4f8e69f14d 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -3712,7 +3712,7 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: stofp_v2i128_v2f64: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -3726,12 +3726,12 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floattidf -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret @@ -3739,7 +3739,7 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-SD-LABEL: stofp_v2i128_v2f64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -3753,12 +3753,12 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floattidf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret @@ -3766,7 +3766,7 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-GI-LABEL: stofp_v2i128_v2f64: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -3778,12 +3778,12 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floattidf -; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-NOFP16-GI-NEXT: mov v0.16b, v1.16b ; CHECK-NOFP16-GI-NEXT: add sp, sp, #48 @@ -3792,7 +3792,7 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-GI-LABEL: stofp_v2i128_v2f64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3804,12 +3804,12 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floattidf -; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-FP16-GI-NEXT: mov v0.16b, v1.16b ; CHECK-FP16-GI-NEXT: add sp, sp, #48 @@ -3823,7 +3823,7 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: utofp_v2i128_v2f64: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -3837,12 +3837,12 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatuntidf -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret @@ -3850,7 +3850,7 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-SD-LABEL: utofp_v2i128_v2f64: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -3864,12 +3864,12 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatuntidf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret @@ -3877,7 +3877,7 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-GI-LABEL: utofp_v2i128_v2f64: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -3889,12 +3889,12 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatuntidf -; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-NOFP16-GI-NEXT: mov v0.16b, v1.16b ; CHECK-NOFP16-GI-NEXT: add sp, sp, #48 @@ -3903,7 +3903,7 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-GI-LABEL: utofp_v2i128_v2f64: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -3915,12 +3915,12 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatuntidf -; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: mov v1.d[1], v0.d[0] ; CHECK-FP16-GI-NEXT: mov v0.16b, v1.16b ; CHECK-FP16-GI-NEXT: add sp, sp, #48 @@ -5514,7 +5514,7 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: stofp_v2i128_v2f32: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -5528,12 +5528,12 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 ; CHECK-NOFP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floattisf -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -5542,7 +5542,7 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-SD-LABEL: stofp_v2i128_v2f32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -5556,12 +5556,12 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floattisf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -5570,7 +5570,7 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-GI-LABEL: stofp_v2i128_v2f32: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -5582,12 +5582,12 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floattisf -; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-NOFP16-GI-NEXT: fmov d0, d1 ; CHECK-NOFP16-GI-NEXT: add sp, sp, #48 @@ -5596,7 +5596,7 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-GI-LABEL: stofp_v2i128_v2f32: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -5608,12 +5608,12 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floattisf -; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-FP16-GI-NEXT: fmov d0, d1 ; CHECK-FP16-GI-NEXT: add sp, sp, #48 @@ -5627,7 +5627,7 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: utofp_v2i128_v2f32: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -5641,12 +5641,12 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 ; CHECK-NOFP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatuntisf -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 @@ -5655,7 +5655,7 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-SD-LABEL: utofp_v2i128_v2f32: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -5669,12 +5669,12 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatuntisf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.s[1], v1.s[0] ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -5683,7 +5683,7 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-GI-LABEL: utofp_v2i128_v2f32: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -5695,12 +5695,12 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 ; CHECK-NOFP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatuntisf -; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NOFP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-NOFP16-GI-NEXT: fmov d0, d1 ; CHECK-NOFP16-GI-NEXT: add sp, sp, #48 @@ -5709,7 +5709,7 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-GI-LABEL: utofp_v2i128_v2f32: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -5721,12 +5721,12 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatuntisf -; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[0] ; CHECK-FP16-GI-NEXT: fmov d0, d1 ; CHECK-FP16-GI-NEXT: add sp, sp, #48 @@ -7371,7 +7371,7 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: stofp_v2i128_v2f16: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -7385,11 +7385,11 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: fcvt h0, s0 ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floattisf ; CHECK-NOFP16-SD-NEXT: fcvt h0, s0 -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-SD-NEXT: mov v0.h[1], v1.h[0] ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -7399,7 +7399,7 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-SD-LABEL: stofp_v2i128_v2f16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -7413,12 +7413,12 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floattihf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.h[1], v1.h[0] ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7427,7 +7427,7 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-GI-LABEL: stofp_v2i128_v2f16: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -7439,11 +7439,11 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: fcvt h0, s0 ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floattisf ; CHECK-NOFP16-GI-NEXT: fcvt h1, s0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-NOFP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -7453,7 +7453,7 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-GI-LABEL: stofp_v2i128_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -7465,12 +7465,12 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floattihf -; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: mov v1.h[1], v0.h[0] ; CHECK-FP16-GI-NEXT: fmov d0, d1 ; CHECK-FP16-GI-NEXT: add sp, sp, #48 @@ -7484,7 +7484,7 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: utofp_v2i128_v2f16: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -7498,11 +7498,11 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: fcvt h0, s0 ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatuntisf ; CHECK-NOFP16-SD-NEXT: fcvt h0, s0 -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-SD-NEXT: mov v0.h[1], v1.h[0] ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -7512,7 +7512,7 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-SD-LABEL: utofp_v2i128_v2f16: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -7526,12 +7526,12 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatuntihf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: mov v0.h[1], v1.h[0] ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-SD-NEXT: add sp, sp, #48 @@ -7540,7 +7540,7 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-GI-LABEL: utofp_v2i128_v2f16: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -7552,11 +7552,11 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: fcvt h0, s0 ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatuntisf ; CHECK-NOFP16-GI-NEXT: fcvt h1, s0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-NOFP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -7566,7 +7566,7 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-GI-LABEL: utofp_v2i128_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -7578,12 +7578,12 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatuntihf -; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: mov v1.h[1], v0.h[0] ; CHECK-FP16-GI-NEXT: fmov d0, d1 ; CHECK-FP16-GI-NEXT: add sp, sp, #48 @@ -7597,46 +7597,46 @@ define <2 x fp128> @stofp_v2i64_v2f128(<2 x i64> %a) { ; CHECK-NOFP16-SD-LABEL: stofp_v2i64_v2f128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-NOFP16-SD-NEXT: fmov x0, d0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatditf -; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x0, v0.d[1] ; CHECK-NOFP16-SD-NEXT: bl __floatditf ; CHECK-NOFP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: stofp_v2i64_v2f128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-FP16-SD-NEXT: fmov x0, d0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatditf -; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x0, v0.d[1] ; CHECK-FP16-SD-NEXT: bl __floatditf ; CHECK-FP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: stofp_v2i64_v2f128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #32 -; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset b8, -16 @@ -7644,20 +7644,20 @@ define <2 x fp128> @stofp_v2i64_v2f128(<2 x i64> %a) { ; CHECK-NOFP16-GI-NEXT: mov d8, v0.d[1] ; CHECK-NOFP16-GI-NEXT: bl __floatditf ; CHECK-NOFP16-GI-NEXT: fmov x0, d8 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatditf ; CHECK-NOFP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: add sp, sp, #32 ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #32 -; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset b8, -16 @@ -7665,12 +7665,12 @@ define <2 x fp128> @stofp_v2i64_v2f128(<2 x i64> %a) { ; CHECK-FP16-GI-NEXT: mov d8, v0.d[1] ; CHECK-FP16-GI-NEXT: bl __floatditf ; CHECK-FP16-GI-NEXT: fmov x0, d8 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatditf ; CHECK-FP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: add sp, sp, #32 ; CHECK-FP16-GI-NEXT: ret entry: @@ -7682,46 +7682,46 @@ define <2 x fp128> @utofp_v2i64_v2f128(<2 x i64> %a) { ; CHECK-NOFP16-SD-LABEL: utofp_v2i64_v2f128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-NOFP16-SD-NEXT: fmov x0, d0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatunditf -; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP16-SD-NEXT: mov x0, v0.d[1] ; CHECK-NOFP16-SD-NEXT: bl __floatunditf ; CHECK-NOFP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: utofp_v2i64_v2f128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-FP16-SD-NEXT: fmov x0, d0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatunditf -; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP16-SD-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP16-SD-NEXT: mov x0, v0.d[1] ; CHECK-FP16-SD-NEXT: bl __floatunditf ; CHECK-FP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: utofp_v2i64_v2f128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #32 -; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset b8, -16 @@ -7729,20 +7729,20 @@ define <2 x fp128> @utofp_v2i64_v2f128(<2 x i64> %a) { ; CHECK-NOFP16-GI-NEXT: mov d8, v0.d[1] ; CHECK-NOFP16-GI-NEXT: bl __floatunditf ; CHECK-NOFP16-GI-NEXT: fmov x0, d8 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatunditf ; CHECK-NOFP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: add sp, sp, #32 ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #32 -; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset b8, -16 @@ -7750,12 +7750,12 @@ define <2 x fp128> @utofp_v2i64_v2f128(<2 x i64> %a) { ; CHECK-FP16-GI-NEXT: mov d8, v0.d[1] ; CHECK-FP16-GI-NEXT: bl __floatunditf ; CHECK-FP16-GI-NEXT: fmov x0, d8 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatunditf ; CHECK-FP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: add sp, sp, #32 ; CHECK-FP16-GI-NEXT: ret entry: @@ -7767,48 +7767,48 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) { ; CHECK-NOFP16-SD-LABEL: stofp_v2i32_v2f128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #32 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NOFP16-SD-NEXT: fmov w0, s0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatsitf -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: mov w0, v1.s[1] ; CHECK-NOFP16-SD-NEXT: bl __floatsitf ; CHECK-NOFP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #32 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: stofp_v2i32_v2f128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #32 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-SD-NEXT: fmov w0, s0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatsitf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov w0, v1.s[1] ; CHECK-FP16-SD-NEXT: bl __floatsitf ; CHECK-FP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #32 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #32 -; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset b8, -16 @@ -7817,20 +7817,20 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) { ; CHECK-NOFP16-GI-NEXT: mov s8, v0.s[1] ; CHECK-NOFP16-GI-NEXT: bl __floatsitf ; CHECK-NOFP16-GI-NEXT: fmov w0, s8 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatsitf ; CHECK-NOFP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: add sp, sp, #32 ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #32 -; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset b8, -16 @@ -7839,12 +7839,12 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) { ; CHECK-FP16-GI-NEXT: mov s8, v0.s[1] ; CHECK-FP16-GI-NEXT: bl __floatsitf ; CHECK-FP16-GI-NEXT: fmov w0, s8 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatsitf ; CHECK-FP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: add sp, sp, #32 ; CHECK-FP16-GI-NEXT: ret entry: @@ -7856,48 +7856,48 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) { ; CHECK-NOFP16-SD-LABEL: utofp_v2i32_v2f128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #32 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-NOFP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NOFP16-SD-NEXT: fmov w0, s0 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatunsitf -; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: mov w0, v1.s[1] ; CHECK-NOFP16-SD-NEXT: bl __floatunsitf ; CHECK-NOFP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #32 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: utofp_v2i32_v2f128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #32 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-SD-NEXT: .cfi_offset w30, -16 ; CHECK-FP16-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-SD-NEXT: fmov w0, s0 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatunsitf -; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: mov w0, v1.s[1] ; CHECK-FP16-SD-NEXT: bl __floatunsitf ; CHECK-FP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #32 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #32 -; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-NOFP16-GI-NEXT: .cfi_offset b8, -16 @@ -7906,20 +7906,20 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) { ; CHECK-NOFP16-GI-NEXT: mov s8, v0.s[1] ; CHECK-NOFP16-GI-NEXT: bl __floatunsitf ; CHECK-NOFP16-GI-NEXT: fmov w0, s8 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatunsitf ; CHECK-NOFP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: add sp, sp, #32 ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #32 -; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str d8, [sp, #16] // 8-byte Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 32 ; CHECK-FP16-GI-NEXT: .cfi_offset w30, -8 ; CHECK-FP16-GI-NEXT: .cfi_offset b8, -16 @@ -7928,12 +7928,12 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) { ; CHECK-FP16-GI-NEXT: mov s8, v0.s[1] ; CHECK-FP16-GI-NEXT: bl __floatunsitf ; CHECK-FP16-GI-NEXT: fmov w0, s8 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatunsitf ; CHECK-FP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; CHECK-FP16-GI-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: add sp, sp, #32 ; CHECK-FP16-GI-NEXT: ret entry: @@ -7945,22 +7945,22 @@ define <2 x fp128> @stofp_v2i16_v2f128(<2 x i16> %a) { ; CHECK-LABEL: stofp_v2i16_v2f128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: sxth w0, w8 ; CHECK-NEXT: bl __floatsitf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: sxth w0, w8 ; CHECK-NEXT: bl __floatsitf ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: @@ -7972,22 +7972,22 @@ define <2 x fp128> @utofp_v2i16_v2f128(<2 x i16> %a) { ; CHECK-LABEL: utofp_v2i16_v2f128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: and w0, w8, #0xffff ; CHECK-NEXT: bl __floatunsitf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: and w0, w8, #0xffff ; CHECK-NEXT: bl __floatunsitf ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: @@ -7999,22 +7999,22 @@ define <2 x fp128> @stofp_v2i8_v2f128(<2 x i8> %a) { ; CHECK-LABEL: stofp_v2i8_v2f128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: sxtb w0, w8 ; CHECK-NEXT: bl __floatsitf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: sxtb w0, w8 ; CHECK-NEXT: bl __floatsitf ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: @@ -8026,22 +8026,22 @@ define <2 x fp128> @utofp_v2i8_v2f128(<2 x i8> %a) { ; CHECK-LABEL: utofp_v2i8_v2f128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: and w0, w8, #0xff ; CHECK-NEXT: bl __floatunsitf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: and w0, w8, #0xff ; CHECK-NEXT: bl __floatunsitf ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: @@ -8053,7 +8053,7 @@ define <2 x fp128> @stofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: stofp_v2i128_v2f128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -8064,19 +8064,19 @@ define <2 x fp128> @stofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: bl __floattitf ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floattitf ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: stofp_v2i128_v2f128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -8087,19 +8087,19 @@ define <2 x fp128> @stofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: bl __floattitf ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floattitf ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-FP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: stofp_v2i128_v2f128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -8110,19 +8110,19 @@ define <2 x fp128> @stofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: bl __floattitf ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floattitf ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: add sp, sp, #48 ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i128_v2f128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -8133,12 +8133,12 @@ define <2 x fp128> @stofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: bl __floattitf ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floattitf ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-FP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: add sp, sp, #48 ; CHECK-FP16-GI-NEXT: ret entry: @@ -8150,7 +8150,7 @@ define <2 x fp128> @utofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-NOFP16-SD-LABEL: utofp_v2i128_v2f128: ; CHECK-NOFP16-SD: // %bb.0: // %entry ; CHECK-NOFP16-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-SD-NEXT: .cfi_offset w19, -8 @@ -8161,19 +8161,19 @@ define <2 x fp128> @utofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-NOFP16-SD-NEXT: bl __floatuntitf ; CHECK-NOFP16-SD-NEXT: mov x0, x20 ; CHECK-NOFP16-SD-NEXT: mov x1, x19 -; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-SD-NEXT: bl __floatuntitf ; CHECK-NOFP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP16-SD-NEXT: ret ; ; CHECK-FP16-SD-LABEL: utofp_v2i128_v2f128: ; CHECK-FP16-SD: // %bb.0: // %entry ; CHECK-FP16-SD-NEXT: sub sp, sp, #48 -; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-SD-NEXT: .cfi_offset w19, -8 @@ -8184,19 +8184,19 @@ define <2 x fp128> @utofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-FP16-SD-NEXT: bl __floatuntitf ; CHECK-FP16-SD-NEXT: mov x0, x20 ; CHECK-FP16-SD-NEXT: mov x1, x19 -; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-SD-NEXT: bl __floatuntitf ; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-FP16-SD-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-SD-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-SD-NEXT: add sp, sp, #48 ; CHECK-FP16-SD-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: utofp_v2i128_v2f128: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: sub sp, sp, #48 -; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NOFP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NOFP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NOFP16-GI-NEXT: .cfi_offset w19, -8 @@ -8207,19 +8207,19 @@ define <2 x fp128> @utofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-NOFP16-GI-NEXT: bl __floatuntitf ; CHECK-NOFP16-GI-NEXT: mov x0, x19 ; CHECK-NOFP16-GI-NEXT: mov x1, x20 -; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NOFP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NOFP16-GI-NEXT: bl __floatuntitf ; CHECK-NOFP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NOFP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NOFP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NOFP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NOFP16-GI-NEXT: add sp, sp, #48 ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i128_v2f128: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: sub sp, sp, #48 -; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-FP16-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-FP16-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-FP16-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-FP16-GI-NEXT: .cfi_offset w19, -8 @@ -8230,12 +8230,12 @@ define <2 x fp128> @utofp_v2i128_v2f128(<2 x i128> %a) { ; CHECK-FP16-GI-NEXT: bl __floatuntitf ; CHECK-FP16-GI-NEXT: mov x0, x19 ; CHECK-FP16-GI-NEXT: mov x1, x20 -; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-FP16-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-FP16-GI-NEXT: bl __floatuntitf ; CHECK-FP16-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-FP16-GI-NEXT: mov v1.16b, v0.16b -; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-FP16-GI-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-FP16-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-FP16-GI-NEXT: add sp, sp, #48 ; CHECK-FP16-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/ldexp.ll b/llvm/test/CodeGen/AArch64/ldexp.ll index 1e35bd627a199..2de20c294dcb3 100644 --- a/llvm/test/CodeGen/AArch64/ldexp.ll +++ b/llvm/test/CodeGen/AArch64/ldexp.ll @@ -134,7 +134,7 @@ define <2 x float> @test_ldexp_v2f32_v2i32(<2 x float> %Val, <2 x i32> %Exp) { ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #48 ; GISEL-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #32] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 48 ; GISEL-NEXT: .cfi_offset w30, -16 ; GISEL-NEXT: .cfi_offset b8, -24 @@ -147,14 +147,14 @@ define <2 x float> @test_ldexp_v2f32_v2i32(<2 x float> %Val, <2 x i32> %Exp) { ; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; GISEL-NEXT: bl ldexpf ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov w0, s9 ; GISEL-NEXT: fmov s0, s8 ; GISEL-NEXT: bl ldexpf -; GISEL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q1, [sp] // 16-byte Reload ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 ; GISEL-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; GISEL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; GISEL-NEXT: mov v1.s[1], v0.s[0] ; GISEL-NEXT: fmov d0, d1 ; GISEL-NEXT: add sp, sp, #48 @@ -165,7 +165,7 @@ define <2 x float> @test_ldexp_v2f32_v2i32(<2 x float> %Val, <2 x i32> %Exp) { ; WINDOWS-NEXT: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #48 ; WINDOWS-NEXT: .seh_stackalloc 48 -; WINDOWS-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; WINDOWS-NEXT: str x30, [sp, #32] // 8-byte Spill ; WINDOWS-NEXT: .seh_save_reg x30, 32 ; WINDOWS-NEXT: .seh_endprologue ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -175,20 +175,20 @@ define <2 x float> @test_ldexp_v2f32_v2i32(<2 x float> %Val, <2 x i32> %Exp) { ; WINDOWS-NEXT: mov w0, v1.s[1] ; WINDOWS-NEXT: fcvt d0, s2 ; WINDOWS-NEXT: bl ldexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: fcvt d1, s1 -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: fmov w0, s0 ; WINDOWS-NEXT: fmov d0, d1 ; WINDOWS-NEXT: bl ldexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov v0.s[1], v1.s[0] ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: .seh_startepilogue -; WINDOWS-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; WINDOWS-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; WINDOWS-NEXT: .seh_save_reg x30, 32 ; WINDOWS-NEXT: add sp, sp, #48 ; WINDOWS-NEXT: .seh_stackalloc 48 diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll index b3be71cc2bbf5..0db9b9d863ca7 100644 --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -13,12 +13,12 @@ define i32 @main() local_unnamed_addr #1 { ; CHECK-LABEL: main: ; CHECK: // %bb.0: // %for.body.lr.ph.i.i.i.i.i.i63 ; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 112 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl _Z5setupv ; CHECK-NEXT: movi v0.4s, #1 -; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: add x0, sp, #48 ; CHECK-NEXT: mov x1, sp ; CHECK-NEXT: str xzr, [sp, #80] @@ -33,7 +33,7 @@ define i32 @main() local_unnamed_addr #1 { ; CHECK-NEXT: .LBB0_2: // %if.then ; CHECK-NEXT: bl f2 ; CHECK-NEXT: .LBB0_3: // %common.ret -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll index c1ea891bc86e7..4d9aa9812cd9d 100644 --- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll +++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll @@ -54,39 +54,39 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) { ; SDAG-LABEL: exp10_v2f16: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; SDAG-NEXT: mov h1, v0.h[1] -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill ; SDAG-NEXT: fcvt s0, h1 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h0, s0 ; SDAG-NEXT: fcvt s1, h1 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp] // 16-byte Spill ; SDAG-NEXT: fmov s0, s1 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h2, s0 ; SDAG-NEXT: mov h1, v1.h[2] ; SDAG-NEXT: fcvt s0, h1 -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: mov v2.h[1], v1.h[0] -; SDAG-NEXT: str q2, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q2, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h2, s0 ; SDAG-NEXT: mov h1, v1.h[3] ; SDAG-NEXT: fcvt s0, h1 -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: mov v1.h[2], v2.h[0] -; SDAG-NEXT: str q1, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q1, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f ; SDAG-NEXT: fcvt h1, s0 -; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr q0, [sp] // 16-byte Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v0.h[3], v1.h[0] ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: add sp, sp, #48 @@ -95,8 +95,8 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) { ; GISEL-LABEL: exp10_v2f16: ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #32 -; GISEL-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; GISEL-NEXT: str d8, [sp, #16] // 8-byte Spill +; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 32 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 @@ -106,13 +106,13 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) { ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt s1, h8 ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov s0, s1 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt h1, s0 -; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; GISEL-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp] // 16-byte Reload +; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; GISEL-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; GISEL-NEXT: mov v0.h[1], v1.h[0] ; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; GISEL-NEXT: add sp, sp, #32 @@ -125,39 +125,39 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; SDAG-LABEL: exp10_v3f16: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; SDAG-NEXT: mov h1, v0.h[1] -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill ; SDAG-NEXT: fcvt s0, h1 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h0, s0 ; SDAG-NEXT: fcvt s1, h1 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp] // 16-byte Spill ; SDAG-NEXT: fmov s0, s1 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h2, s0 ; SDAG-NEXT: mov h1, v1.h[2] ; SDAG-NEXT: fcvt s0, h1 -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: mov v2.h[1], v1.h[0] -; SDAG-NEXT: str q2, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q2, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h2, s0 ; SDAG-NEXT: mov h1, v1.h[3] ; SDAG-NEXT: fcvt s0, h1 -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: mov v1.h[2], v2.h[0] -; SDAG-NEXT: str q1, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q1, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f ; SDAG-NEXT: fcvt h1, s0 -; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr q0, [sp] // 16-byte Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v0.h[3], v1.h[0] ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: add sp, sp, #48 @@ -167,7 +167,7 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #64 ; GISEL-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #48] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 64 ; GISEL-NEXT: .cfi_offset w30, -16 ; GISEL-NEXT: .cfi_offset b8, -24 @@ -179,18 +179,18 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt s1, h8 ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Spill ; GISEL-NEXT: fmov s0, s1 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt s1, h9 ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov s0, s1 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; GISEL-NEXT: fcvt h0, s0 ; GISEL-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; GISEL-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; GISEL-NEXT: mov v1.h[1], v2.h[0] ; GISEL-NEXT: mov v1.h[2], v0.h[0] ; GISEL-NEXT: mov v0.16b, v1.16b @@ -205,39 +205,39 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; SDAG-LABEL: exp10_v4f16: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; SDAG-NEXT: mov h1, v0.h[1] -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill ; SDAG-NEXT: fcvt s0, h1 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h0, s0 ; SDAG-NEXT: fcvt s1, h1 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp] // 16-byte Spill ; SDAG-NEXT: fmov s0, s1 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h2, s0 ; SDAG-NEXT: mov h1, v1.h[2] ; SDAG-NEXT: fcvt s0, h1 -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: mov v2.h[1], v1.h[0] -; SDAG-NEXT: str q2, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q2, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: fcvt h2, s0 ; SDAG-NEXT: mov h1, v1.h[3] ; SDAG-NEXT: fcvt s0, h1 -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: mov v1.h[2], v2.h[0] -; SDAG-NEXT: str q1, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q1, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f ; SDAG-NEXT: fcvt h1, s0 -; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr q0, [sp] // 16-byte Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v0.h[3], v1.h[0] ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: add sp, sp, #48 @@ -246,9 +246,9 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; GISEL-LABEL: exp10_v4f16: ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #80 -; GISEL-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; GISEL-NEXT: str d10, [sp, #48] // 8-byte Spill ; GISEL-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #72] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 80 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 @@ -262,25 +262,25 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt s1, h8 ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #32] // 16-byte Spill ; GISEL-NEXT: fmov s0, s1 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt s1, h9 ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov s0, s1 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: fcvt s1, h10 ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Spill ; GISEL-NEXT: fmov s0, s1 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload ; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; GISEL-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; GISEL-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; GISEL-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload -; GISEL-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #72] // 8-byte Reload +; GISEL-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; GISEL-NEXT: mov v1.h[1], v3.h[0] ; GISEL-NEXT: mov v1.h[2], v2.h[0] ; GISEL-NEXT: mov v1.h[3], v0.h[0] @@ -331,21 +331,21 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) { ; SDAG-LABEL: exp10_v2f32: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp] // 16-byte Spill ; SDAG-NEXT: mov s0, v0.s[1] ; SDAG-NEXT: bl exp10f ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v0.s[1], v1.s[0] ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: add sp, sp, #48 @@ -354,8 +354,8 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) { ; GISEL-LABEL: exp10_v2f32: ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #32 -; GISEL-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; GISEL-NEXT: str d8, [sp, #16] // 8-byte Spill +; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 32 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 @@ -364,13 +364,13 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) { ; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov s0, s8 ; GISEL-NEXT: bl exp10f -; GISEL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q1, [sp] // 16-byte Reload ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; GISEL-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; GISEL-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; GISEL-NEXT: mov v1.s[1], v0.s[0] ; GISEL-NEXT: fmov d0, d1 ; GISEL-NEXT: add sp, sp, #32 @@ -383,27 +383,27 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) { ; SDAG-LABEL: exp10_v3f32: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill ; SDAG-NEXT: mov s0, v0.s[1] ; SDAG-NEXT: bl exp10f ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 ; SDAG-NEXT: mov v0.s[1], v1.s[0] -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; SDAG-NEXT: mov s0, v0.s[2] ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v1.s[2], v0.s[0] ; SDAG-NEXT: mov v0.16b, v1.16b ; SDAG-NEXT: add sp, sp, #48 @@ -413,7 +413,7 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) { ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #64 ; GISEL-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #48] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 64 ; GISEL-NEXT: .cfi_offset w30, -16 ; GISEL-NEXT: .cfi_offset b8, -24 @@ -423,16 +423,16 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) { ; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Spill ; GISEL-NEXT: fmov s0, s8 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov s0, s9 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; GISEL-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; GISEL-NEXT: mov v1.s[1], v2.s[0] ; GISEL-NEXT: mov v1.s[2], v0.s[0] @@ -447,34 +447,34 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; SDAG-LABEL: exp10_v4f32: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill ; SDAG-NEXT: mov s0, v0.s[1] ; SDAG-NEXT: bl exp10f ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 killed $q0 ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 ; SDAG-NEXT: mov v0.s[1], v1.s[0] -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; SDAG-NEXT: mov s0, v0.s[2] ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 ; SDAG-NEXT: mov v1.s[2], v0.s[0] -; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; SDAG-NEXT: mov s0, v0.s[3] -; SDAG-NEXT: str q1, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q1, [sp] // 16-byte Spill ; SDAG-NEXT: bl exp10f -; SDAG-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $s0 killed $s0 def $q0 -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v1.s[3], v0.s[0] ; SDAG-NEXT: mov v0.16b, v1.16b ; SDAG-NEXT: add sp, sp, #48 @@ -483,9 +483,9 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; GISEL-LABEL: exp10_v4f32: ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #80 -; GISEL-NEXT: str d10, [sp, #48] // 8-byte Folded Spill +; GISEL-NEXT: str d10, [sp, #48] // 8-byte Spill ; GISEL-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #72] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #72] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 80 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 @@ -497,24 +497,24 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) { ; GISEL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #32] // 16-byte Spill ; GISEL-NEXT: fmov s0, s8 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Spill ; GISEL-NEXT: fmov s0, s9 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov s0, s10 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload ; GISEL-NEXT: // kill: def $s0 killed $s0 def $q0 -; GISEL-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #72] // 8-byte Reload ; GISEL-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload -; GISEL-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload +; GISEL-NEXT: ldr d10, [sp, #48] // 8-byte Reload ; GISEL-NEXT: mov v1.s[1], v2.s[0] -; GISEL-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q2, [sp] // 16-byte Reload ; GISEL-NEXT: mov v1.s[2], v2.s[0] ; GISEL-NEXT: mov v1.s[3], v0.s[0] ; GISEL-NEXT: mov v0.16b, v1.16b @@ -549,20 +549,20 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) { ; SDAG-LABEL: exp10_v2f64: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #48 -; SDAG-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #32] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 48 ; SDAG-NEXT: .cfi_offset w30, -16 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp] // 16-byte Spill ; SDAG-NEXT: mov d0, v0.d[1] ; SDAG-NEXT: bl exp10 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: bl exp10 -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 -; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; SDAG-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; SDAG-NEXT: mov v0.d[1], v1.d[0] ; SDAG-NEXT: add sp, sp, #48 ; SDAG-NEXT: ret @@ -570,8 +570,8 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) { ; GISEL-LABEL: exp10_v2f64: ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #32 -; GISEL-NEXT: str d8, [sp, #16] // 8-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; GISEL-NEXT: str d8, [sp, #16] // 8-byte Spill +; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 32 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 @@ -579,13 +579,13 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) { ; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; GISEL-NEXT: bl exp10 ; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov d0, d8 ; GISEL-NEXT: bl exp10 -; GISEL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q1, [sp] // 16-byte Reload ; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; GISEL-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload +; GISEL-NEXT: ldr d8, [sp, #16] // 8-byte Reload ; GISEL-NEXT: mov v1.d[1], v0.d[0] ; GISEL-NEXT: mov v0.16b, v1.16b ; GISEL-NEXT: add sp, sp, #32 @@ -599,7 +599,7 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) { ; SDAG: // %bb.0: ; SDAG-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; SDAG-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; SDAG-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #24] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 32 ; SDAG-NEXT: .cfi_offset w30, -8 ; SDAG-NEXT: .cfi_offset b8, -16 @@ -616,7 +616,7 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) { ; SDAG-NEXT: bl exp10 ; SDAG-NEXT: fmov d1, d9 ; SDAG-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; SDAG-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; SDAG-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; SDAG-NEXT: fmov d2, d0 ; SDAG-NEXT: fmov d0, d10 ; SDAG-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -626,7 +626,7 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) { ; GISEL: // %bb.0: ; GISEL-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; GISEL-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 32 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 @@ -643,7 +643,7 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) { ; GISEL-NEXT: bl exp10 ; GISEL-NEXT: fmov d1, d8 ; GISEL-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; GISEL-NEXT: fmov d2, d0 ; GISEL-NEXT: fmov d0, d10 ; GISEL-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -656,33 +656,33 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) { ; SDAG-LABEL: exp10_v4f64: ; SDAG: // %bb.0: ; SDAG-NEXT: sub sp, sp, #64 -; SDAG-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; SDAG-NEXT: str x30, [sp, #48] // 8-byte Spill ; SDAG-NEXT: .cfi_def_cfa_offset 64 ; SDAG-NEXT: .cfi_offset w30, -16 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill +; SDAG-NEXT: str q0, [sp] // 16-byte Spill ; SDAG-NEXT: mov d0, v0.d[1] -; SDAG-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; SDAG-NEXT: str q1, [sp, #32] // 16-byte Spill ; SDAG-NEXT: bl exp10 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp] // 16-byte Reload ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: bl exp10 -; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; SDAG-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; SDAG-NEXT: mov v0.d[1], v1.d[0] -; SDAG-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp, #16] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; SDAG-NEXT: mov d0, v0.d[1] ; SDAG-NEXT: bl exp10 ; SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 -; SDAG-NEXT: str q0, [sp] // 16-byte Folded Spill -; SDAG-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; SDAG-NEXT: str q0, [sp] // 16-byte Spill +; SDAG-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 ; SDAG-NEXT: bl exp10 ; SDAG-NEXT: fmov d1, d0 ; SDAG-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; SDAG-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; SDAG-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; SDAG-NEXT: mov v1.d[1], v2.d[0] ; SDAG-NEXT: add sp, sp, #64 ; SDAG-NEXT: ret @@ -691,35 +691,35 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) { ; GISEL: // %bb.0: ; GISEL-NEXT: sub sp, sp, #80 ; GISEL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; GISEL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #64] // 8-byte Spill ; GISEL-NEXT: .cfi_def_cfa_offset 80 ; GISEL-NEXT: .cfi_offset w30, -16 ; GISEL-NEXT: .cfi_offset b8, -24 ; GISEL-NEXT: .cfi_offset b9, -32 -; GISEL-NEXT: str q1, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q1, [sp] // 16-byte Spill ; GISEL-NEXT: mov d8, v0.d[1] ; GISEL-NEXT: mov d9, v1.d[1] ; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; GISEL-NEXT: bl exp10 ; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #32] // 16-byte Spill ; GISEL-NEXT: fmov d0, d8 ; GISEL-NEXT: bl exp10 ; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Spill +; GISEL-NEXT: ldr q0, [sp] // 16-byte Reload ; GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; GISEL-NEXT: bl exp10 ; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: fmov d0, d9 ; GISEL-NEXT: bl exp10 ; GISEL-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload ; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; GISEL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; GISEL-NEXT: mov v2.d[1], v1.d[0] -; GISEL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q1, [sp] // 16-byte Reload ; GISEL-NEXT: mov v1.d[1], v0.d[0] ; GISEL-NEXT: mov v0.16b, v2.16b ; GISEL-NEXT: add sp, sp, #80 diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll index 4e1876db772ed..bc9f4c54174d4 100644 --- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll +++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll @@ -84,40 +84,40 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #36 ; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: add x19, sp, #36 ; CHECK-NEXT: fcvt s0, h1 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: add x0, sp, #32 ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: fcvt h2, s0 ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: mov h1, v1.h[2] ; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: mov v2.h[1], v1.h[0] -; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: fcvt h2, s0 ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: mov h1, v1.h[3] ; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: mov v1.h[2], v2.h[0] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: fcvt h2, s0 ; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 @@ -131,40 +131,40 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind ; WINDOWS-NEXT: sub sp, sp, #64 ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 ; WINDOWS-NEXT: mov h1, v0.h[1] -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #36 ; WINDOWS-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #36 ; WINDOWS-NEXT: fcvt d0, h1 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt h0, d0 ; WINDOWS-NEXT: add x0, sp, #32 ; WINDOWS-NEXT: fcvt d1, h1 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: fmov d0, d1 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt h2, d0 ; WINDOWS-NEXT: add x0, sp, #40 ; WINDOWS-NEXT: mov h1, v1.h[2] ; WINDOWS-NEXT: fcvt d0, h1 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: mov v2.h[1], v1.h[0] -; WINDOWS-NEXT: str q2, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q2, [sp] // 16-byte Spill ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt h2, d0 ; WINDOWS-NEXT: add x0, sp, #44 ; WINDOWS-NEXT: mov h1, v1.h[3] ; WINDOWS-NEXT: fcvt d0, h1 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: mov v1.h[2], v2.h[0] -; WINDOWS-NEXT: str q1, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q1, [sp] // 16-byte Spill ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt h2, d0 ; WINDOWS-NEXT: ldr s1, [sp, #32] -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1 @@ -182,39 +182,39 @@ define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) nounwind ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: fcvt s0, h1 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: fcvt h2, s0 ; CHECK-NEXT: add x0, sp, #56 ; CHECK-NEXT: mov h1, v1.h[2] ; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: mov v2.h[1], v1.h[0] -; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: fcvt h2, s0 ; CHECK-NEXT: add x0, sp, #60 ; CHECK-NEXT: mov h1, v1.h[3] ; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: mov v1.h[2], v2.h[0] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: fcvt h1, s0 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: mov v0.h[3], v1.h[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #64 @@ -225,39 +225,39 @@ define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) nounwind ; WINDOWS-NEXT: sub sp, sp, #64 ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 ; WINDOWS-NEXT: mov h1, v0.h[1] -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #44 -; WINDOWS-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; WINDOWS-NEXT: str x30, [sp, #48] // 8-byte Spill ; WINDOWS-NEXT: fcvt d0, h1 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt h0, d0 ; WINDOWS-NEXT: add x0, sp, #40 ; WINDOWS-NEXT: fcvt d1, h1 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: fmov d0, d1 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt h2, d0 ; WINDOWS-NEXT: add x0, sp, #56 ; WINDOWS-NEXT: mov h1, v1.h[2] ; WINDOWS-NEXT: fcvt d0, h1 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: mov v2.h[1], v1.h[0] -; WINDOWS-NEXT: str q2, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q2, [sp] // 16-byte Spill ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt h2, d0 ; WINDOWS-NEXT: add x0, sp, #60 ; WINDOWS-NEXT: mov h1, v1.h[3] ; WINDOWS-NEXT: fcvt d0, h1 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: mov v1.h[2], v2.h[0] -; WINDOWS-NEXT: str q1, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q1, [sp] // 16-byte Spill ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt h1, d0 -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; WINDOWS-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload +; WINDOWS-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; WINDOWS-NEXT: mov v0.h[3], v1.h[0] ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: add sp, sp, #64 @@ -272,12 +272,12 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x19, sp, #28 ; CHECK-NEXT: mov h0, v0.h[1] @@ -294,12 +294,12 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind { ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #48 ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: fcvt d0, h0 ; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #28 ; WINDOWS-NEXT: add x19, sp, #28 ; WINDOWS-NEXT: mov h0, v0.h[1] @@ -320,32 +320,32 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi ; CHECK-LABEL: test_frexp_v3f32_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: add x0, sp, #56 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x19, sp, #56 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: add x0, sp, #60 ; CHECK-NEXT: add x20, sp, #60 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: ldr s1, [sp, #44] -; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-NEXT: mov v2.s[2], v0.s[0] ; CHECK-NEXT: ld1 { v1.s }[2], [x20] @@ -358,7 +358,7 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #80 ; WINDOWS-NEXT: mov s1, v0.s[1] -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #36 ; WINDOWS-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill @@ -367,33 +367,33 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: add x0, sp, #32 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #40 ; WINDOWS-NEXT: add x20, sp, #40 ; WINDOWS-NEXT: mov v0.s[1], v1.s[0] -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov s0, v0.s[2] ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #44 ; WINDOWS-NEXT: add x21, sp, #44 ; WINDOWS-NEXT: mov v1.s[2], v0.s[0] -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov s0, v0.s[3] -; WINDOWS-NEXT: str q1, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q1, [sp] // 16-byte Spill ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr s1, [sp, #32] ; WINDOWS-NEXT: fcvt s2, d0 -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: mov v0.s[3], v2.s[0] ; WINDOWS-NEXT: ld1 { v1.s }[2], [x20] @@ -506,20 +506,20 @@ define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwi ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: add x19, sp, #40 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: ldr s1, [sp, #44] -; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload @@ -534,22 +534,22 @@ define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwi ; WINDOWS-NEXT: sub sp, sp, #48 ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 ; WINDOWS-NEXT: mov s1, v0.s[1] -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #28 ; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #28 ; WINDOWS-NEXT: fcvt d0, s1 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: fcvt d1, s1 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: fmov d0, d1 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: ldr s1, [sp, #24] -; WINDOWS-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q2, [sp] // 16-byte Reload ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1 @@ -566,20 +566,20 @@ define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) nounwi ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #48 @@ -590,21 +590,21 @@ define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) nounwi ; WINDOWS-NEXT: sub sp, sp, #32 ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 ; WINDOWS-NEXT: mov s1, v0.s[1] -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #28 -; WINDOWS-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; WINDOWS-NEXT: str x30, [sp, #16] // 8-byte Spill ; WINDOWS-NEXT: fcvt d0, s1 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: fcvt d1, s1 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: fmov d0, d1 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; WINDOWS-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload +; WINDOWS-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; WINDOWS-NEXT: mov v0.s[1], v1.s[0] ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: add sp, sp, #32 @@ -621,10 +621,10 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: add x19, sp, #24 ; CHECK-NEXT: mov s0, v0.s[1] @@ -640,12 +640,12 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind { ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #48 ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #28 ; WINDOWS-NEXT: add x19, sp, #28 ; WINDOWS-NEXT: mov s0, v0.s[1] @@ -666,7 +666,7 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; CHECK-LABEL: test_frexp_v4f32_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill @@ -674,31 +674,31 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; CHECK-NEXT: add x19, sp, #40 ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: add x0, sp, #36 ; CHECK-NEXT: add x20, sp, #36 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: add x0, sp, #32 ; CHECK-NEXT: add x21, sp, #32 ; CHECK-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: ldr s1, [sp, #44] -; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v2.s[3], v0.s[0] ; CHECK-NEXT: ld1 { v1.s }[1], [x19] @@ -715,7 +715,7 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #80 ; WINDOWS-NEXT: mov s1, v0.s[1] -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #36 ; WINDOWS-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill @@ -724,33 +724,33 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: add x0, sp, #32 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #40 ; WINDOWS-NEXT: add x20, sp, #40 ; WINDOWS-NEXT: mov v0.s[1], v1.s[0] -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov s0, v0.s[2] ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #44 ; WINDOWS-NEXT: add x21, sp, #44 ; WINDOWS-NEXT: mov v1.s[2], v0.s[0] -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov s0, v0.s[3] -; WINDOWS-NEXT: str q1, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q1, [sp] // 16-byte Spill ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr s1, [sp, #32] ; WINDOWS-NEXT: fcvt s2, d0 -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: mov v0.s[3], v2.s[0] ; WINDOWS-NEXT: ld1 { v1.s }[2], [x20] @@ -767,36 +767,36 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; CHECK-LABEL: test_frexp_v4f32_v4i32_only_use_fract: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: add x0, sp, #56 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: bl frexpf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #60 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #64 @@ -806,38 +806,38 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #64 ; WINDOWS-NEXT: mov s1, v0.s[1] -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #44 -; WINDOWS-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; WINDOWS-NEXT: str x30, [sp, #48] // 8-byte Spill ; WINDOWS-NEXT: fcvt d0, s1 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 ; WINDOWS-NEXT: add x0, sp, #40 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #56 ; WINDOWS-NEXT: mov v0.s[1], v1.s[0] -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov s0, v0.s[2] ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #60 ; WINDOWS-NEXT: mov v1.s[2], v0.s[0] -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: mov s0, v0.s[3] -; WINDOWS-NEXT: str q1, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q1, [sp] // 16-byte Spill ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s1, d0 -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; WINDOWS-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload +; WINDOWS-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; WINDOWS-NEXT: mov v0.s[3], v1.s[0] ; WINDOWS-NEXT: add sp, sp, #64 ; WINDOWS-NEXT: ret @@ -853,20 +853,20 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: add x19, sp, #24 ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #20 ; CHECK-NEXT: add x20, sp, #20 ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl frexpf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #16 ; CHECK-NEXT: add x21, sp, #16 ; CHECK-NEXT: mov s0, v0.s[3] @@ -884,25 +884,25 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; WINDOWS-LABEL: test_frexp_v4f32_v4i32_only_use_exp: ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #64 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: add x0, sp, #16 ; WINDOWS-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #20 ; WINDOWS-NEXT: add x19, sp, #20 ; WINDOWS-NEXT: mov s0, v0.s[1] ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: add x20, sp, #24 ; WINDOWS-NEXT: mov s0, v0.s[2] ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #28 ; WINDOWS-NEXT: add x21, sp, #28 ; WINDOWS-NEXT: mov s0, v0.s[3] @@ -991,20 +991,20 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) noun ; CHECK-LABEL: test_frexp_v2f64_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: add x19, sp, #40 ; CHECK-NEXT: bl frexp ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl frexp ; CHECK-NEXT: ldr s1, [sp, #44] -; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload @@ -1016,20 +1016,20 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) noun ; WINDOWS-LABEL: test_frexp_v2f64_v2i32: ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #64 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: mov d0, v0.d[1] ; WINDOWS-NEXT: add x0, sp, #40 ; WINDOWS-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #40 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #44 -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr s1, [sp, #44] -; WINDOWS-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q2, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload @@ -1045,20 +1045,20 @@ define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) noun ; CHECK-LABEL: test_frexp_v2f64_v2i32_only_use_fract: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: add x0, sp, #40 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl frexp ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl frexp -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -1066,20 +1066,20 @@ define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) noun ; WINDOWS-LABEL: test_frexp_v2f64_v2i32_only_use_fract: ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #48 -; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Spill ; WINDOWS-NEXT: mov d0, v0.d[1] ; WINDOWS-NEXT: add x0, sp, #40 -; WINDOWS-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; WINDOWS-NEXT: str x30, [sp, #32] // 8-byte Spill ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: add x0, sp, #44 -; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q1, [sp] // 16-byte Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 -; WINDOWS-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; WINDOWS-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; WINDOWS-NEXT: mov v0.d[1], v1.d[0] ; WINDOWS-NEXT: add sp, sp, #48 ; WINDOWS-NEXT: ret @@ -1094,10 +1094,10 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl frexp -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: add x19, sp, #24 ; CHECK-NEXT: mov d0, v0.d[1] @@ -1114,10 +1114,10 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind ; WINDOWS-NEXT: sub sp, sp, #48 ; WINDOWS-NEXT: add x0, sp, #28 ; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill -; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill +; WINDOWS-NEXT: str q0, [sp] // 16-byte Spill ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: bl frexp -; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Reload ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: add x19, sp, #24 ; WINDOWS-NEXT: mov d0, v0.d[1] diff --git a/llvm/test/CodeGen/AArch64/llvm.modf.ll b/llvm/test/CodeGen/AArch64/llvm.modf.ll index 503742fa1c443..e9544375adc09 100644 --- a/llvm/test/CodeGen/AArch64/llvm.modf.ll +++ b/llvm/test/CodeGen/AArch64/llvm.modf.ll @@ -59,44 +59,44 @@ define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { ; CHECK-SD-LABEL: test_modf_v2f16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #64 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: add x0, sp, #44 ; CHECK-SD-NEXT: fcvt s0, h1 ; CHECK-SD-NEXT: bl modff -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h0, s0 ; CHECK-SD-NEXT: add x0, sp, #40 ; CHECK-SD-NEXT: fcvt s1, h1 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: fmov s0, s1 ; CHECK-SD-NEXT: bl modff -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: add x0, sp, #56 ; CHECK-SD-NEXT: mov h1, v1.h[2] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] -; CHECK-SD-NEXT: str q2, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q2, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl modff -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: fcvt h2, s0 ; CHECK-SD-NEXT: add x0, sp, #60 ; CHECK-SD-NEXT: mov h1, v1.h[3] ; CHECK-SD-NEXT: fcvt s0, h1 -; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] -; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: bl modff ; CHECK-SD-NEXT: ldp s2, s1, [sp, #40] ; CHECK-SD-NEXT: fcvt h4, s0 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: fcvt h3, s1 ; CHECK-SD-NEXT: fcvt h1, s2 ; CHECK-SD-NEXT: ldr s2, [sp, #56] @@ -115,8 +115,8 @@ define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { ; CHECK-GI-LABEL: test_modf_v2f16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str d8, [sp, #48] // 8-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #48] // 8-byte Spill +; CHECK-GI-NEXT: str x30, [sp, #56] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w30, -8 ; CHECK-GI-NEXT: .cfi_offset b8, -16 @@ -128,15 +128,15 @@ define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { ; CHECK-GI-NEXT: fcvt h0, s0 ; CHECK-GI-NEXT: ldr s1, [sp, #40] ; CHECK-GI-NEXT: add x0, sp, #44 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: fcvt h0, s1 -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fcvt s0, h8 ; CHECK-GI-NEXT: bl modff ; CHECK-GI-NEXT: ldr s1, [sp, #44] ; CHECK-GI-NEXT: fcvt h3, s0 -; CHECK-GI-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #56] // 8-byte Reload +; CHECK-GI-NEXT: ldr d8, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: fcvt h2, s1 ; CHECK-GI-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov v0.h[1], v3.h[0] @@ -168,36 +168,36 @@ define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) { ; CHECK-SD-LABEL: test_modf_v3f32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w30, -32 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: add x0, sp, #56 ; CHECK-SD-NEXT: add x19, sp, #56 ; CHECK-SD-NEXT: bl modff ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: add x0, sp, #44 -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl modff -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: add x0, sp, #60 ; CHECK-SD-NEXT: add x20, sp, #60 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: mov s0, v0.s[2] ; CHECK-SD-NEXT: bl modff ; CHECK-SD-NEXT: ldr s1, [sp, #44] -; CHECK-SD-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-SD-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-SD-NEXT: mov v2.s[2], v0.s[0] ; CHECK-SD-NEXT: ld1 { v1.s }[2], [x20] @@ -228,19 +228,19 @@ define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) { ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl modff ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: add x0, sp, #76 ; CHECK-GI-NEXT: add x19, sp, #76 ; CHECK-GI-NEXT: ldr s0, [sp, #72] -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s9 ; CHECK-GI-NEXT: bl modff ; CHECK-GI-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v2.s[1], v1.s[0] -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: mov v2.s[2], v0.s[0] ; CHECK-GI-NEXT: ld1 { v1.s }[2], [x19] @@ -261,19 +261,19 @@ define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) { ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: add x0, sp, #40 ; CHECK-SD-NEXT: add x19, sp, #40 ; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl modff ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: add x0, sp, #44 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl modff ; CHECK-SD-NEXT: ldr s1, [sp, #44] -; CHECK-SD-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-SD-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload @@ -286,7 +286,7 @@ define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) { ; CHECK-GI-LABEL: test_modf_v2f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #32] // 8-byte Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -298,16 +298,16 @@ define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) { ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-GI-NEXT: bl modff ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: add x0, sp, #44 ; CHECK-GI-NEXT: add x19, sp, #44 ; CHECK-GI-NEXT: ldr s0, [sp, #40] -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov s0, s8 ; CHECK-GI-NEXT: bl modff ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp, #32] // 8-byte Reload ; CHECK-GI-NEXT: mov v2.s[1], v0.s[0] ; CHECK-GI-NEXT: ld1 { v1.s }[1], [x19] ; CHECK-GI-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload @@ -342,19 +342,19 @@ define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w30, -16 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: add x0, sp, #32 ; CHECK-SD-NEXT: add x19, sp, #32 ; CHECK-SD-NEXT: bl modf ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-SD-NEXT: add x0, sp, #40 -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl modf ; CHECK-SD-NEXT: ldr d1, [sp, #40] -; CHECK-SD-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [sp, #16] // 16-byte Reload ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: ld1 { v1.d }[1], [x19] ; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload @@ -365,7 +365,7 @@ define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { ; CHECK-GI-LABEL: test_modf_v2f64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str d8, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w19, -8 @@ -376,16 +376,16 @@ define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: bl modf ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-GI-NEXT: add x0, sp, #56 ; CHECK-GI-NEXT: add x19, sp, #56 ; CHECK-GI-NEXT: ldr d0, [sp, #40] -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-GI-NEXT: fmov d0, d8 ; CHECK-GI-NEXT: bl modf ; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: mov v2.d[1], v0.d[0] ; CHECK-GI-NEXT: ld1 { v1.d }[1], [x19] ; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload @@ -400,13 +400,13 @@ define { fp128, fp128 } @test_modf_fp128(fp128 %a) { ; CHECK-LABEL: test_modf_fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl modfl ; CHECK-NEXT: ldr q1, [sp] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %result = call { fp128, fp128 } @llvm.modf.fp128(fp128 %a) diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll index 21da8645b9b16..bb549496c7a4f 100644 --- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll +++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll @@ -20,14 +20,14 @@ define { half, half } @test_sincos_f16(half %a) nounwind { ; NO-LIBCALL: // %bb.0: ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill ; NO-LIBCALL-NEXT: fcvt s8, h0 -; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl sinf ; NO-LIBCALL-NEXT: fcvt h9, s0 ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fmov s1, s0 -; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: fcvt h1, s1 ; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload @@ -94,31 +94,31 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) nounwind { ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #36 ; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: fcvt s0, h1 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #60 ; CHECK-NEXT: add x1, sp, #56 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl sincosf ; CHECK-NEXT: ldp s2, s0, [sp, #32] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: ldp s3, s1, [sp, #24] ; CHECK-NEXT: fcvt h4, s0 ; CHECK-NEXT: fcvt h2, s2 @@ -147,66 +147,66 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) nounwind { ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 ; NO-LIBCALL-NEXT: mov h1, v0.h[1] ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Spill ; NO-LIBCALL-NEXT: fcvt s8, h1 ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: fcvt h0, s0 ; NO-LIBCALL-NEXT: fcvt s9, h1 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: fcvt h0, s0 ; NO-LIBCALL-NEXT: mov h1, v1.h[2] ; NO-LIBCALL-NEXT: fcvt s10, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s10 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: fcvt h0, s0 ; NO-LIBCALL-NEXT: mov h1, v1.h[3] ; NO-LIBCALL-NEXT: fcvt s11, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] ; NO-LIBCALL-NEXT: fmov s0, s11 -; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: bl sinf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0] ; NO-LIBCALL-NEXT: fmov s0, s8 -; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s10 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] ; NO-LIBCALL-NEXT: fmov s0, s11 -; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fmov s1, s0 ; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; NO-LIBCALL-NEXT: fcvt h2, s1 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0] ; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 ; NO-LIBCALL-NEXT: add sp, sp, #80 @@ -221,31 +221,31 @@ define { <3 x half>, <3 x half> } @test_sincos_v3f16(<3 x half> %a) nounwind { ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: add x0, sp, #36 ; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: fcvt s0, h1 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #60 ; CHECK-NEXT: add x1, sp, #56 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl sincosf ; CHECK-NEXT: ldp s2, s0, [sp, #32] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: ldp s3, s1, [sp, #24] ; CHECK-NEXT: fcvt h4, s0 ; CHECK-NEXT: fcvt h2, s2 @@ -274,66 +274,66 @@ define { <3 x half>, <3 x half> } @test_sincos_v3f16(<3 x half> %a) nounwind { ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 ; NO-LIBCALL-NEXT: mov h1, v0.h[1] ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Spill ; NO-LIBCALL-NEXT: fcvt s8, h1 ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: fcvt h0, s0 ; NO-LIBCALL-NEXT: fcvt s9, h1 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: fcvt h0, s0 ; NO-LIBCALL-NEXT: mov h1, v1.h[2] ; NO-LIBCALL-NEXT: fcvt s10, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s10 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: fcvt h0, s0 ; NO-LIBCALL-NEXT: mov h1, v1.h[3] ; NO-LIBCALL-NEXT: fcvt s11, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] ; NO-LIBCALL-NEXT: fmov s0, s11 -; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: bl sinf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0] ; NO-LIBCALL-NEXT: fmov s0, s8 -; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s10 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] ; NO-LIBCALL-NEXT: fmov s0, s11 -; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Spill ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fmov s1, s0 ; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; NO-LIBCALL-NEXT: fcvt h2, s1 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0] ; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 ; NO-LIBCALL-NEXT: add sp, sp, #80 @@ -356,13 +356,13 @@ define { float, float } @test_sincos_f32(float %a) nounwind { ; NO-LIBCALL-LABEL: test_sincos_f32: ; NO-LIBCALL: // %bb.0: ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Spill ; NO-LIBCALL-NEXT: fmov s8, s0 ; NO-LIBCALL-NEXT: bl sinf ; NO-LIBCALL-NEXT: fmov s9, s0 ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; NO-LIBCALL-NEXT: fmov s1, s0 ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload @@ -377,20 +377,20 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) nounwind ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: add x0, sp, #20 ; CHECK-NEXT: add x1, sp, #16 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: add x19, sp, #28 ; CHECK-NEXT: add x20, sp, #24 ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 ; CHECK-NEXT: add x21, sp, #44 @@ -398,7 +398,7 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) nounwind ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl sincosf ; CHECK-NEXT: ldp s1, s0, [sp, #16] -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: ld1 { v0.s }[1], [x19] ; CHECK-NEXT: ld1 { v1.s }[1], [x20] ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload @@ -413,44 +413,44 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) nounwind ; NO-LIBCALL-NEXT: sub sp, sp, #80 ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; NO-LIBCALL-NEXT: mov s8, v0.s[1] -; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl sinf ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 ; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: mov s9, v0.s[2] ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 ; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0] ; NO-LIBCALL-NEXT: fmov s0, s8 -; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 ; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] -; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s9 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fmov s2, s0 ; NO-LIBCALL-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; NO-LIBCALL-NEXT: mov v1.s[2], v2.s[0] ; NO-LIBCALL-NEXT: add sp, sp, #80 ; NO-LIBCALL-NEXT: ret @@ -465,12 +465,12 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) nounwind ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: add x19, sp, #28 @@ -478,7 +478,7 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) nounwind ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl sincosf ; CHECK-NEXT: ldp s1, s0, [sp, #40] -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: ld1 { v0.s }[1], [x19] ; CHECK-NEXT: ld1 { v1.s }[1], [x20] ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload @@ -490,33 +490,33 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) nounwind ; NO-LIBCALL-LABEL: test_sincos_v2f32: ; NO-LIBCALL: // %bb.0: ; NO-LIBCALL-NEXT: sub sp, sp, #64 -; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Spill ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 ; NO-LIBCALL-NEXT: mov s8, v0.s[1] -; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Spill +; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl sinf ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 ; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov s0, s8 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 ; NO-LIBCALL-NEXT: bl cosf ; NO-LIBCALL-NEXT: fmov s1, s0 ; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Reload +; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Reload ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; NO-LIBCALL-NEXT: mov v1.s[1], v2.s[0] ; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 @@ -532,24 +532,24 @@ define { double, double } @test_sincos_f64(double %a) nounwind { ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: bl sincos ; CHECK-NEXT: ldr d0, [sp, #24] ; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; NO-LIBCALL-LABEL: test_sincos_f64: ; NO-LIBCALL: // %bb.0: ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Spill ; NO-LIBCALL-NEXT: fmov d8, d0 ; NO-LIBCALL-NEXT: bl sin ; NO-LIBCALL-NEXT: fmov d9, d0 ; NO-LIBCALL-NEXT: fmov d0, d8 ; NO-LIBCALL-NEXT: bl cos -; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; NO-LIBCALL-NEXT: fmov d1, d0 ; NO-LIBCALL-NEXT: fmov d0, d9 ; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload @@ -564,12 +564,12 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) nounwi ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: add x0, sp, #56 ; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: add x0, sp, #32 ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: add x19, sp, #32 @@ -578,7 +578,7 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) nounwi ; CHECK-NEXT: bl sincos ; CHECK-NEXT: ldr d0, [sp, #56] ; CHECK-NEXT: ldr d1, [sp, #40] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: ld1 { v0.d }[1], [x19] ; CHECK-NEXT: ld1 { v1.d }[1], [x20] ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload @@ -588,32 +588,32 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) nounwi ; NO-LIBCALL-LABEL: test_sincos_v2f64: ; NO-LIBCALL: // %bb.0: ; NO-LIBCALL-NEXT: sub sp, sp, #64 -; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Spill ; NO-LIBCALL-NEXT: mov d8, v0.d[1] -; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Spill +; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Spill ; NO-LIBCALL-NEXT: fmov d0, d8 ; NO-LIBCALL-NEXT: bl sin ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; NO-LIBCALL-NEXT: bl sin -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 ; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Spill ; NO-LIBCALL-NEXT: fmov d0, d8 ; NO-LIBCALL-NEXT: bl cos ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; NO-LIBCALL-NEXT: bl cos ; NO-LIBCALL-NEXT: fmov d1, d0 ; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Reload +; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Reload ; NO-LIBCALL-NEXT: mov v1.d[1], v2.d[0] ; NO-LIBCALL-NEXT: add sp, sp, #64 ; NO-LIBCALL-NEXT: ret @@ -628,7 +628,7 @@ define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwi ; CHECK-NEXT: add x0, sp, #16 ; CHECK-NEXT: add x1, sp, #8 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: fmov d8, d2 ; CHECK-NEXT: fmov d9, d1 ; CHECK-NEXT: bl sincos @@ -644,7 +644,7 @@ define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwi ; CHECK-NEXT: ldr d2, [sp, #72] ; CHECK-NEXT: ldp d4, d1, [sp, #24] ; CHECK-NEXT: ldr d5, [sp, #40] -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret @@ -657,7 +657,7 @@ define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwi ; NO-LIBCALL-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; NO-LIBCALL-NEXT: fmov d8, d2 ; NO-LIBCALL-NEXT: fmov d9, d1 -; NO-LIBCALL-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #48] // 8-byte Spill ; NO-LIBCALL-NEXT: bl sin ; NO-LIBCALL-NEXT: fmov d11, d0 ; NO-LIBCALL-NEXT: fmov d0, d9 @@ -676,7 +676,7 @@ define { <3 x double>, <3 x double> } @test_sincos_v3f64(<3 x double> %a) nounwi ; NO-LIBCALL-NEXT: bl cos ; NO-LIBCALL-NEXT: fmov d5, d0 ; NO-LIBCALL-NEXT: fmov d0, d11 -; NO-LIBCALL-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; NO-LIBCALL-NEXT: fmov d3, d10 ; NO-LIBCALL-NEXT: fmov d4, d9 ; NO-LIBCALL-NEXT: fmov d1, d12 diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll index b386df077c09d..6d2288b06acce 100644 --- a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll +++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll @@ -64,24 +64,24 @@ define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 { ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h1, v0[1] -; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Spill ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill ; CHECK-NEXT: fcvt s0, h1 ; CHECK-NEXT: bl ___sincospif -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #20 ; CHECK-NEXT: add x1, sp, #16 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl ___sincospif -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #36 ; CHECK-NEXT: add x1, sp, #32 ; CHECK-NEXT: mov h0, v0[2] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: bl ___sincospif -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 ; CHECK-NEXT: mov h0, v0[3] @@ -139,17 +139,17 @@ define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 { ; CHECK-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Spill ; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl ___sincospif -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #36 ; CHECK-NEXT: add x1, sp, #32 ; CHECK-NEXT: add x19, sp, #36 ; CHECK-NEXT: add x20, sp, #32 ; CHECK-NEXT: mov s0, v0[1] ; CHECK-NEXT: bl ___sincospif -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 ; CHECK-NEXT: add x21, sp, #44 @@ -179,10 +179,10 @@ define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 { ; CHECK-NEXT: add x1, sp, #24 ; CHECK-NEXT: stp x20, x19, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Spill ; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl ___sincospif -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #20 ; CHECK-NEXT: add x1, sp, #16 ; CHECK-NEXT: add x19, sp, #20 @@ -226,10 +226,10 @@ define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 { ; CHECK-NEXT: add x1, sp, #32 ; CHECK-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Spill ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl ___sincospi -; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Reload ; CHECK-NEXT: add x0, sp, #24 ; CHECK-NEXT: add x1, sp, #16 ; CHECK-NEXT: add x19, sp, #24 diff --git a/llvm/test/CodeGen/AArch64/luti-with-sme2.ll b/llvm/test/CodeGen/AArch64/luti-with-sme2.ll index 59e1cba8317bd..ae8ed58bcf8f8 100644 --- a/llvm/test/CodeGen/AArch64/luti-with-sme2.ll +++ b/llvm/test/CodeGen/AArch64/luti-with-sme2.ll @@ -6,7 +6,7 @@ define { , } @test_luti4_lane_i16_x2_tuple( ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -21,7 +21,7 @@ define { , } @test_luti4_lane_i16_x2_tuple( ; CHECK-NEXT: luti4 z1.h, { z11.h, z12.h }, z0[0] ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -47,7 +47,7 @@ define { , } @test_luti4_lane_f16_x2_tupl ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -62,7 +62,7 @@ define { , } @test_luti4_lane_f16_x2_tupl ; CHECK-NEXT: luti4 z1.h, { z11.h, z12.h }, z0[0] ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -88,7 +88,7 @@ define { , } @test_luti4_lane_bf16_x2 ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -103,7 +103,7 @@ define { , } @test_luti4_lane_bf16_x2 ; CHECK-NEXT: luti4 z1.h, { z11.h, z12.h }, z0[0] ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index 65afd9276c787..a52eebbe4bfd1 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -873,7 +873,7 @@ define double @reassociate_adds_from_calls() { ; CHECK: // %bb.0: ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset b8, -16 @@ -888,7 +888,7 @@ define double @reassociate_adds_from_calls() { ; CHECK-NEXT: bl bar ; CHECK-NEXT: fadd d1, d8, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: fadd d1, d1, d10 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -908,7 +908,7 @@ define double @reassociate_adds_from_calls_reassoc() { ; CHECK: // %bb.0: ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset b8, -16 @@ -923,7 +923,7 @@ define double @reassociate_adds_from_calls_reassoc() { ; CHECK-NEXT: bl bar ; CHECK-NEXT: fadd d1, d8, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: fadd d0, d10, d0 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -943,7 +943,7 @@ define double @already_reassociated() { ; CHECK: // %bb.0: ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset b8, -16 @@ -958,7 +958,7 @@ define double @already_reassociated() { ; CHECK-NEXT: bl bar ; CHECK-NEXT: fadd d1, d8, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: fadd d0, d10, d0 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -978,7 +978,7 @@ define double @already_reassociated_reassoc() { ; CHECK: // %bb.0: ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset b8, -16 @@ -993,7 +993,7 @@ define double @already_reassociated_reassoc() { ; CHECK-NEXT: bl bar ; CHECK-NEXT: fadd d1, d8, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: fadd d0, d10, d0 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll index 1e7224683c6c8..0a6dbed9a9eaf 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll @@ -11,14 +11,14 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V8A-NEXT: hint #27 ; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 -; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V8A-NEXT: str x30, [sp, #16] // 8-byte Spill ; V8A-NEXT: .cfi_def_cfa_offset 32 ; V8A-NEXT: .cfi_offset w30, -16 ; V8A-NEXT: bl OUTLINED_FUNCTION_0 ; V8A-NEXT: //APP ; V8A-NEXT: mov x30, x0 ; V8A-NEXT: //NO_APP -; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; V8A-NEXT: add sp, sp, #32 ; V8A-NEXT: hint #31 ; V8A-NEXT: ret @@ -29,14 +29,14 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V83A-NEXT: pacibsp ; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 -; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V83A-NEXT: str x30, [sp, #16] // 8-byte Spill ; V83A-NEXT: .cfi_def_cfa_offset 32 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl OUTLINED_FUNCTION_0 ; V83A-NEXT: //APP ; V83A-NEXT: mov x30, x0 ; V83A-NEXT: //NO_APP -; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; V83A-NEXT: add sp, sp, #32 ; V83A-NEXT: retab %1 = alloca i32, align 4 @@ -62,14 +62,14 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V8A-NEXT: hint #27 ; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 -; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V8A-NEXT: str x30, [sp, #16] // 8-byte Spill ; V8A-NEXT: .cfi_def_cfa_offset 32 ; V8A-NEXT: .cfi_offset w30, -16 ; V8A-NEXT: bl OUTLINED_FUNCTION_0 ; V8A-NEXT: //APP ; V8A-NEXT: mov x30, x0 ; V8A-NEXT: //NO_APP -; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; V8A-NEXT: add sp, sp, #32 ; V8A-NEXT: hint #31 ; V8A-NEXT: ret @@ -80,14 +80,14 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V83A-NEXT: pacibsp ; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 -; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V83A-NEXT: str x30, [sp, #16] // 8-byte Spill ; V83A-NEXT: .cfi_def_cfa_offset 32 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl OUTLINED_FUNCTION_0 ; V83A-NEXT: //APP ; V83A-NEXT: mov x30, x0 ; V83A-NEXT: //NO_APP -; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; V83A-NEXT: add sp, sp, #32 ; V83A-NEXT: retab %1 = alloca i32, align 4 @@ -113,14 +113,14 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V8A-NEXT: hint #27 ; V8A-NEXT: .cfi_negate_ra_state ; V8A-NEXT: sub sp, sp, #32 -; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V8A-NEXT: str x30, [sp, #16] // 8-byte Spill ; V8A-NEXT: .cfi_def_cfa_offset 32 ; V8A-NEXT: .cfi_offset w30, -16 ; V8A-NEXT: bl OUTLINED_FUNCTION_0 ; V8A-NEXT: //APP ; V8A-NEXT: mov x30, x0 ; V8A-NEXT: //NO_APP -; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; V8A-NEXT: add sp, sp, #32 ; V8A-NEXT: hint #31 ; V8A-NEXT: ret @@ -131,14 +131,14 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; V83A-NEXT: pacibsp ; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: sub sp, sp, #32 -; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V83A-NEXT: str x30, [sp, #16] // 8-byte Spill ; V83A-NEXT: .cfi_def_cfa_offset 32 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl OUTLINED_FUNCTION_0 ; V83A-NEXT: //APP ; V83A-NEXT: mov x30, x0 ; V83A-NEXT: //NO_APP -; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; V83A-NEXT: add sp, sp, #32 ; V83A-NEXT: retab %1 = alloca i32, align 4 @@ -164,3 +164,5 @@ define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" ; CHECK-NOT: hint #2{{[5,7]}} ; CHECK-NOT: .cfi_negate_ra_state ; CHECK-NOT: auti{{[a,b]}}sp +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/mingw-refptr.ll b/llvm/test/CodeGen/AArch64/mingw-refptr.ll index cc9fac0506ff5..02c81440dd753 100644 --- a/llvm/test/CodeGen/AArch64/mingw-refptr.ll +++ b/llvm/test/CodeGen/AArch64/mingw-refptr.ll @@ -82,7 +82,7 @@ define dso_local void @sspFunc() #0 { ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .seh_stackalloc 32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, .refptr.__stack_chk_guard @@ -99,7 +99,7 @@ define dso_local void @sspFunc() #0 { ; CHECK-NEXT: b.ne .LBB6_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 16 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: .seh_stackalloc 32 diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll index cff7759c72c94..7c535d4d63a6c 100644 --- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll @@ -134,9 +134,9 @@ define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 { ; CHECK-FASTISEL-LABEL: reverse_v8i32: ; CHECK-FASTISEL: // %bb.0: ; CHECK-FASTISEL-NEXT: sub sp, sp, #16 -; CHECK-FASTISEL-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-FASTISEL-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-FASTISEL-NEXT: mov v1.16b, v0.16b -; CHECK-FASTISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FASTISEL-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s ; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s @@ -165,12 +165,12 @@ define <16 x float> @reverse_v16f32(<16 x float> %a) #0 { ; CHECK-FASTISEL-LABEL: reverse_v16f32: ; CHECK-FASTISEL: // %bb.0: ; CHECK-FASTISEL-NEXT: sub sp, sp, #32 -; CHECK-FASTISEL-NEXT: str q3, [sp, #16] // 16-byte Folded Spill -; CHECK-FASTISEL-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-FASTISEL-NEXT: str q3, [sp, #16] // 16-byte Spill +; CHECK-FASTISEL-NEXT: str q2, [sp] // 16-byte Spill ; CHECK-FASTISEL-NEXT: mov v2.16b, v1.16b -; CHECK-FASTISEL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-FASTISEL-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-FASTISEL-NEXT: mov v3.16b, v0.16b -; CHECK-FASTISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FASTISEL-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s ; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll index a84e6e7bcae82..669a49330856d 100644 --- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll @@ -61,9 +61,9 @@ define @reverse_nxv32i1( %a) #0 { ; CHECK-FASTISEL: // %bb.0: ; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 -; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] // 2-byte Spill ; CHECK-FASTISEL-NEXT: mov p1.b, p0.b -; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Reload ; CHECK-FASTISEL-NEXT: rev p0.b, p0.b ; CHECK-FASTISEL-NEXT: rev p1.b, p1.b ; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll index fb2a1fa697c26..4b0d110632959 100644 --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -2401,7 +2401,7 @@ define i32 @test_udot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: .cfi_offset w29, -96 ; CHECK-GI-NEXT: ldp q2, q1, [x1] ; CHECK-GI-NEXT: movi d0, #0000000000000000 -; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill +; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Spill ; CHECK-GI-NEXT: mov b6, v2.b[3] ; CHECK-GI-NEXT: mov b7, v2.b[4] ; CHECK-GI-NEXT: mov b16, v2.b[5] @@ -2548,7 +2548,7 @@ define i32 @test_udot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: mov v16.h[3], w10 ; CHECK-GI-NEXT: uxtb w10, w3 ; CHECK-GI-NEXT: mov v6.h[4], w18 -; CHECK-GI-NEXT: ldr w18, [sp, #4] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w18, [sp, #4] // 4-byte Reload ; CHECK-GI-NEXT: mov v7.h[3], w9 ; CHECK-GI-NEXT: uxtb w9, w16 ; CHECK-GI-NEXT: uxtb w16, w22 @@ -2560,7 +2560,7 @@ define i32 @test_udot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v16.h[4], w16 ; CHECK-GI-NEXT: mov v7.h[4], w13 -; CHECK-GI-NEXT: ldr w13, [sp, #8] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w13, [sp, #8] // 4-byte Reload ; CHECK-GI-NEXT: mov v6.h[5], w9 ; CHECK-GI-NEXT: uxtb w9, w1 ; CHECK-GI-NEXT: mov v3.h[5], w11 @@ -2654,7 +2654,7 @@ define i32 @test_udot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: mov v17.s[2], wzr ; CHECK-GI-NEXT: mov v3.s[3], w8 ; CHECK-GI-NEXT: mov v4.s[3], w9 -; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; CHECK-GI-NEXT: mov v5.s[3], w15 ; CHECK-GI-NEXT: mov v6.s[3], w17 ; CHECK-GI-NEXT: mov v7.s[3], w18 @@ -2831,7 +2831,7 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: .cfi_offset w29, -96 ; CHECK-GI-NEXT: ldp q2, q1, [x1] ; CHECK-GI-NEXT: movi d0, #0000000000000000 -; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill +; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Spill ; CHECK-GI-NEXT: mov b5, v2.b[2] ; CHECK-GI-NEXT: mov b6, v2.b[3] ; CHECK-GI-NEXT: mov b7, v2.b[4] @@ -2982,7 +2982,7 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: sxtb w8, w16 ; CHECK-GI-NEXT: sxtb w16, w3 ; CHECK-GI-NEXT: mov v6.h[4], w11 -; CHECK-GI-NEXT: ldr w11, [sp, #4] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w11, [sp, #4] // 4-byte Reload ; CHECK-GI-NEXT: mov v3.h[5], w9 ; CHECK-GI-NEXT: sxtb w9, w15 ; CHECK-GI-NEXT: sxtb w15, w27 @@ -2997,7 +2997,7 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: sxtb w10, w26 ; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v6.h[5], w8 -; CHECK-GI-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; CHECK-GI-NEXT: mov v7.h[5], w10 ; CHECK-GI-NEXT: sxtb w10, w12 ; CHECK-GI-NEXT: sxtb w12, w18 @@ -3085,7 +3085,7 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: mov v17.s[2], wzr ; CHECK-GI-NEXT: mov v3.s[3], w8 ; CHECK-GI-NEXT: mov v4.s[3], w9 -; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; CHECK-GI-NEXT: mov v5.s[3], w16 ; CHECK-GI-NEXT: mov v6.s[3], w17 ; CHECK-GI-NEXT: mov v7.s[3], w18 @@ -4563,7 +4563,7 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: .cfi_offset w29, -96 ; CHECK-GI-NEXT: ldp q7, q16, [x1] ; CHECK-GI-NEXT: movi d5, #0000000000000000 -; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill +; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Spill ; CHECK-GI-NEXT: movi d6, #0000000000000000 ; CHECK-GI-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NEXT: movi d1, #0000000000000000 @@ -4623,7 +4623,7 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: uxtb w10, w21 ; CHECK-GI-NEXT: mov v7.h[2], w9 ; CHECK-GI-NEXT: uxtb w9, w13 -; CHECK-GI-NEXT: str s20, [sp] // 4-byte Folded Spill +; CHECK-GI-NEXT: str s20, [sp] // 4-byte Spill ; CHECK-GI-NEXT: mov b25, v16.b[10] ; CHECK-GI-NEXT: fmov w25, s18 ; CHECK-GI-NEXT: uxtb w22, w22 @@ -4791,11 +4791,11 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: uxtb w11, w7 ; CHECK-GI-NEXT: fmov w8, s26 ; CHECK-GI-NEXT: mov v19.h[4], w15 -; CHECK-GI-NEXT: ldr w15, [sp] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w15, [sp] // 4-byte Reload ; CHECK-GI-NEXT: mov v21.h[7], w17 ; CHECK-GI-NEXT: uxtb w17, w6 ; CHECK-GI-NEXT: mov v22.h[4], w11 -; CHECK-GI-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w11, [sp, #8] // 4-byte Reload ; CHECK-GI-NEXT: uxtb w8, w8 ; CHECK-GI-NEXT: uxtb w15, w15 ; CHECK-GI-NEXT: fmov w13, s30 @@ -4812,7 +4812,7 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: fmov w12, s31 ; CHECK-GI-NEXT: mov v17.h[7], w11 ; CHECK-GI-NEXT: uxtb w11, w13 -; CHECK-GI-NEXT: ldr w13, [sp, #4] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w13, [sp, #4] // 4-byte Reload ; CHECK-GI-NEXT: mov v20.h[5], w17 ; CHECK-GI-NEXT: mov v23.h[7], w8 ; CHECK-GI-NEXT: fmov w9, s27 @@ -4917,7 +4917,7 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: mov v1.s[3], wzr ; CHECK-GI-NEXT: mov v3.s[3], wzr ; CHECK-GI-NEXT: mov v21.s[3], w9 -; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; CHECK-GI-NEXT: mov v2.s[3], wzr ; CHECK-GI-NEXT: mov v23.s[2], w11 ; CHECK-GI-NEXT: umov w11, v19.h[7] @@ -5165,7 +5165,7 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: .cfi_offset w29, -96 ; CHECK-GI-NEXT: ldp q7, q16, [x1] ; CHECK-GI-NEXT: movi d1, #0000000000000000 -; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill +; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Spill ; CHECK-GI-NEXT: movi d3, #0000000000000000 ; CHECK-GI-NEXT: movi d2, #0000000000000000 ; CHECK-GI-NEXT: movi d5, #0000000000000000 @@ -5386,7 +5386,7 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: sxtb w13, w6 ; CHECK-GI-NEXT: mov v20.h[7], w17 ; CHECK-GI-NEXT: mov v21.h[4], w9 -; CHECK-GI-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-GI-NEXT: mov v22.h[5], w18 ; CHECK-GI-NEXT: mov b25, v17.b[14] ; CHECK-GI-NEXT: fmov w26, s27 @@ -5397,7 +5397,7 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: sxtb w10, w11 ; CHECK-GI-NEXT: sxtb w11, w16 ; CHECK-GI-NEXT: mov v21.h[5], w8 -; CHECK-GI-NEXT: ldr w8, [sp, #4] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w8, [sp, #4] // 4-byte Reload ; CHECK-GI-NEXT: sxtb w15, w26 ; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: sxtb w8, w8 @@ -5507,7 +5507,7 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK-GI-NEXT: smov w14, v18.h[3] ; CHECK-GI-NEXT: smov w15, v18.h[7] ; CHECK-GI-NEXT: fmov s18, w9 -; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; CHECK-GI-NEXT: mov v17.s[2], w13 ; CHECK-GI-NEXT: smov w12, v24.h[3] ; CHECK-GI-NEXT: smov w13, v24.h[7] @@ -5848,7 +5848,7 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 ; CHECK-GI-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 ; CHECK-GI-NEXT: .cfi_offset w29, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -6599,7 +6599,7 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 ; CHECK-GI-NEXT: add v4.4s, v5.4s, v7.4s ; CHECK-GI-NEXT: add v5.4s, v16.4s, v17.4s ; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-GI-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: add v2.4s, v3.4s, v6.4s ; CHECK-GI-NEXT: add v1.4s, v23.4s, v1.4s @@ -6793,7 +6793,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-GI-NEXT: stp d13, d12, [sp, #-64]! // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x29, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x29, [sp, #48] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 ; CHECK-GI-NEXT: .cfi_offset w29, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -7077,7 +7077,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-GI-NEXT: add v7.4s, v7.4s, v17.4s ; CHECK-GI-NEXT: add v16.4s, v20.4s, v21.4s ; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-GI-NEXT: ldr x29, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x29, [sp, #48] // 8-byte Reload ; CHECK-GI-NEXT: add v0.4s, v18.4s, v0.4s ; CHECK-GI-NEXT: add v2.4s, v3.4s, v4.4s ; CHECK-GI-NEXT: add v3.4s, v5.4s, v6.4s @@ -8901,7 +8901,7 @@ define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-GI-NEXT: stp d13, d12, [sp, #176] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d11, d10, [sp, #192] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp d9, d8, [sp, #208] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x29, [sp, #224] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x29, [sp, #224] // 8-byte Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 240 ; CHECK-GI-NEXT: .cfi_offset w29, -16 ; CHECK-GI-NEXT: .cfi_offset b8, -24 @@ -8914,7 +8914,7 @@ define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-GI-NEXT: .cfi_offset b15, -80 ; CHECK-GI-NEXT: ushll v31.8h, v0.8b, #0 ; CHECK-GI-NEXT: ushll2 v8.8h, v0.16b, #0 -; CHECK-GI-NEXT: ldr x29, [sp, #224] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x29, [sp, #224] // 8-byte Reload ; CHECK-GI-NEXT: sshll v11.8h, v4.8b, #0 ; CHECK-GI-NEXT: sshll2 v12.8h, v4.16b, #0 ; CHECK-GI-NEXT: ushll v9.8h, v1.8b, #0 @@ -8944,7 +8944,7 @@ define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-GI-NEXT: smlal2 v27.4s, v10.8h, v14.8h ; CHECK-GI-NEXT: smull v26.4s, v0.4h, v4.4h ; CHECK-GI-NEXT: ushll v31.8h, v25.8b, #0 -; CHECK-GI-NEXT: str q19, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q19, [sp, #144] // 16-byte Spill ; CHECK-GI-NEXT: ushll2 v25.8h, v25.16b, #0 ; CHECK-GI-NEXT: ushll v8.8h, v22.8b, #0 ; CHECK-GI-NEXT: stp q2, q1, [sp] // 32-byte Folded Spill @@ -8956,9 +8956,9 @@ define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-GI-NEXT: stp q4, q20, [sp, #112] // 32-byte Folded Spill ; CHECK-GI-NEXT: ushll2 v20.8h, v18.16b, #0 ; CHECK-GI-NEXT: sshll v11.8h, v17.8b, #0 -; CHECK-GI-NEXT: str q27, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q28, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q27, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: str q27, [sp, #32] // 16-byte Spill +; CHECK-GI-NEXT: ldr q28, [sp, #112] // 16-byte Reload +; CHECK-GI-NEXT: ldr q27, [sp, #48] // 16-byte Reload ; CHECK-GI-NEXT: sshll2 v19.8h, v17.16b, #0 ; CHECK-GI-NEXT: sshll v12.8h, v16.8b, #0 ; CHECK-GI-NEXT: sshll2 v18.8h, v16.16b, #0 @@ -8978,19 +8978,19 @@ define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-GI-NEXT: smull v1.4s, v10.4h, v14.4h ; CHECK-GI-NEXT: smull v0.4s, v20.4h, v29.4h ; CHECK-GI-NEXT: smlal2 v26.4s, v27.8h, v28.8h -; CHECK-GI-NEXT: ldr q28, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q27, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q28, [sp, #80] // 16-byte Reload +; CHECK-GI-NEXT: ldr q27, [sp, #16] // 16-byte Reload ; CHECK-GI-NEXT: smlal2 v15.4s, v3.8h, v7.8h ; CHECK-GI-NEXT: ldp q7, q3, [sp, #128] // 32-byte Folded Reload ; CHECK-GI-NEXT: smlal2 v23.4s, v27.8h, v28.8h -; CHECK-GI-NEXT: ldr q28, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q27, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q28, [sp, #64] // 16-byte Reload +; CHECK-GI-NEXT: ldr q27, [sp] // 16-byte Reload ; CHECK-GI-NEXT: smlal2 v17.4s, v31.8h, v11.8h ; CHECK-GI-NEXT: smlal2 v6.4s, v25.8h, v19.8h ; CHECK-GI-NEXT: smlal2 v16.4s, v8.8h, v12.8h ; CHECK-GI-NEXT: smlal2 v24.4s, v27.8h, v28.8h ; CHECK-GI-NEXT: smlal2 v4.4s, v22.8h, v18.8h -; CHECK-GI-NEXT: ldr q18, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q18, [sp, #32] // 16-byte Reload ; CHECK-GI-NEXT: smlal2 v5.4s, v9.8h, v13.8h ; CHECK-GI-NEXT: ldp d9, d8, [sp, #208] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp d13, d12, [sp, #176] // 16-byte Folded Reload @@ -8999,7 +8999,7 @@ define i32 @test_usdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-GI-NEXT: ldp d11, d10, [sp, #192] // 16-byte Folded Reload ; CHECK-GI-NEXT: smlal2 v0.4s, v20.8h, v29.8h ; CHECK-GI-NEXT: add v3.4s, v3.4s, v7.4s -; CHECK-GI-NEXT: ldr q7, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q7, [sp, #96] // 16-byte Reload ; CHECK-GI-NEXT: add v19.4s, v24.4s, v15.4s ; CHECK-GI-NEXT: ldp d15, d14, [sp, #160] // 16-byte Folded Reload ; CHECK-GI-NEXT: add v7.4s, v7.4s, v18.4s diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll index f7a87ae340a73..61746e673ea1d 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -502,10 +502,10 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { ; CHECK-LE-NEXT: sub sp, sp, #32 ; CHECK-LE-NEXT: mov d1, v0.d[1] ; CHECK-LE-NEXT: mov x0, sp -; CHECK-LE-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-LE-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-LE-NEXT: stnp d0, d1, [sp] ; CHECK-LE-NEXT: bl dummy -; CHECK-LE-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-LE-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-LE-NEXT: add sp, sp, #32 ; CHECK-LE-NEXT: ret ; @@ -513,10 +513,10 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: sub sp, sp, #32 ; CHECK-BE-NEXT: mov x0, sp -; CHECK-BE-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-BE-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-BE-NEXT: str q0, [sp] ; CHECK-BE-NEXT: bl dummy -; CHECK-BE-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-BE-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-BE-NEXT: add sp, sp, #32 ; CHECK-BE-NEXT: ret %tmp0 = alloca <4 x float> @@ -531,10 +531,10 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { ; CHECK-LE-NEXT: sub sp, sp, #48 ; CHECK-LE-NEXT: mov d1, v0.d[1] ; CHECK-LE-NEXT: mov x0, sp -; CHECK-LE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-LE-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-LE-NEXT: stnp d0, d1, [sp, #16] ; CHECK-LE-NEXT: bl dummy -; CHECK-LE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-LE-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-LE-NEXT: add sp, sp, #48 ; CHECK-LE-NEXT: ret ; @@ -542,10 +542,10 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: sub sp, sp, #48 ; CHECK-BE-NEXT: mov x0, sp -; CHECK-BE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-BE-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-BE-NEXT: str q0, [sp, #16] ; CHECK-BE-NEXT: bl dummy -; CHECK-BE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-BE-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-BE-NEXT: add sp, sp, #48 ; CHECK-BE-NEXT: ret %tmp0 = alloca <4 x float>, i32 2 diff --git a/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll b/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll index 22774ebf1a662..5b64d25fbe65f 100644 --- a/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll +++ b/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll @@ -10,12 +10,12 @@ define void @streaming_mode_change1() #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -34,12 +34,12 @@ define void @streaming_mode_change2() #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -58,12 +58,12 @@ define void @streaming_mode_change3() #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/perm-tb-with-sme2.ll b/llvm/test/CodeGen/AArch64/perm-tb-with-sme2.ll index 1ceb25b89a364..ede66fd09925d 100644 --- a/llvm/test/CodeGen/AArch64/perm-tb-with-sme2.ll +++ b/llvm/test/CodeGen/AArch64/perm-tb-with-sme2.ll @@ -10,7 +10,7 @@ define { , } @tbl2_b_tuple(i64 %stride, ptr ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -20,7 +20,7 @@ define { , } @tbl2_b_tuple(i64 %stride, ptr ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: ld1b { z3.b, z11.b }, pn8/z, [x1] ; CHECK-NEXT: ld1b { z4.b, z12.b }, pn8/z, [x1, x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.b, { z3.b, z4.b }, z0.b ; CHECK-NEXT: tbl z1.b, { z11.b, z12.b }, z0.b ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload @@ -50,7 +50,7 @@ define { , } @tbl2_h_tuple(i64 %stride, ptr ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -61,7 +61,7 @@ define { , } @tbl2_h_tuple(i64 %stride, ptr ; CHECK-NEXT: add x8, x1, x0 ; CHECK-NEXT: ld1h { z3.h, z11.h }, pn8/z, [x1] ; CHECK-NEXT: ld1h { z4.h, z12.h }, pn8/z, [x8] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.h, { z3.h, z4.h }, z0.h ; CHECK-NEXT: tbl z1.h, { z11.h, z12.h }, z0.h ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload @@ -91,7 +91,7 @@ define { , } @tbl2_s_tuple(i64 %stride, ptr ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -102,7 +102,7 @@ define { , } @tbl2_s_tuple(i64 %stride, ptr ; CHECK-NEXT: add x8, x1, x0 ; CHECK-NEXT: ld1w { z3.s, z11.s }, pn8/z, [x1] ; CHECK-NEXT: ld1w { z4.s, z12.s }, pn8/z, [x8] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.s, { z3.s, z4.s }, z0.s ; CHECK-NEXT: tbl z1.s, { z11.s, z12.s }, z0.s ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload @@ -132,7 +132,7 @@ define { , } @tbl2_d_tuple(i64 %stride, ptr ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -143,7 +143,7 @@ define { , } @tbl2_d_tuple(i64 %stride, ptr ; CHECK-NEXT: add x8, x1, x0 ; CHECK-NEXT: ld1d { z3.d, z11.d }, pn8/z, [x1] ; CHECK-NEXT: ld1d { z4.d, z12.d }, pn8/z, [x8] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.d, { z3.d, z4.d }, z0.d ; CHECK-NEXT: tbl z1.d, { z11.d, z12.d }, z0.d ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload @@ -173,7 +173,7 @@ define { , } @tbl2_bf16_tuple(i64 %st ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -184,7 +184,7 @@ define { , } @tbl2_bf16_tuple(i64 %st ; CHECK-NEXT: add x8, x1, x0 ; CHECK-NEXT: ld1h { z3.h, z11.h }, pn8/z, [x1] ; CHECK-NEXT: ld1h { z4.h, z12.h }, pn8/z, [x8] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.h, { z3.h, z4.h }, z0.h ; CHECK-NEXT: tbl z1.h, { z11.h, z12.h }, z0.h ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload @@ -214,7 +214,7 @@ define { , } @tbl2_f32_tuple(i64 %strid ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -225,7 +225,7 @@ define { , } @tbl2_f32_tuple(i64 %strid ; CHECK-NEXT: add x8, x1, x0 ; CHECK-NEXT: ld1w { z3.s, z11.s }, pn8/z, [x1] ; CHECK-NEXT: ld1w { z4.s, z12.s }, pn8/z, [x8] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.s, { z3.s, z4.s }, z0.s ; CHECK-NEXT: tbl z1.s, { z11.s, z12.s }, z0.s ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload @@ -255,7 +255,7 @@ define { , } @tbl2_f64_tuple(i64 %str ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z12, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z11, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -266,7 +266,7 @@ define { , } @tbl2_f64_tuple(i64 %str ; CHECK-NEXT: add x8, x1, x0 ; CHECK-NEXT: ld1d { z3.d, z11.d }, pn8/z, [x1] ; CHECK-NEXT: ld1d { z4.d, z12.d }, pn8/z, [x8] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: tbl z2.d, { z3.d, z4.d }, z0.d ; CHECK-NEXT: tbl z1.d, { z11.d, z12.d }, z0.d ; CHECK-NEXT: ldr z12, [sp, #1, mul vl] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll index 495541e9fd949..5b27ba8bf8452 100644 --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -69,36 +69,36 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind ; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl powf ; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: fmov s1, #0.25000000 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -111,20 +111,20 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi ; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: fmov d1, #0.25000000 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl pow ; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl pow -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/pr135821.ll b/llvm/test/CodeGen/AArch64/pr135821.ll index cfd6cd086e130..1f9b68aeae0c7 100644 --- a/llvm/test/CodeGen/AArch64/pr135821.ll +++ b/llvm/test/CodeGen/AArch64/pr135821.ll @@ -5,16 +5,16 @@ define <4 x float> @f(ptr %0) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr q1, [x0, #56]! ; CHECK-NEXT: ldr d0, [x0, #16] ; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl use -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %2 = getelementptr inbounds nuw i8, ptr %0, i64 56 diff --git a/llvm/test/CodeGen/AArch64/pr142314.ll b/llvm/test/CodeGen/AArch64/pr142314.ll index f707ed26026f2..5c4f6021063fd 100644 --- a/llvm/test/CodeGen/AArch64/pr142314.ll +++ b/llvm/test/CodeGen/AArch64/pr142314.ll @@ -10,14 +10,14 @@ define <2 x ptr addrspace(1)> @widget() nounwind gc "statepoint-example" { ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: blr xzr ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: orr x8, x8, #0x8 ; CHECK-NEXT: ld1 { v0.d }[1], [x8] ; CHECK-NEXT: add sp, sp, #32 diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll index 4ec63ecb2eeb4..18732ae5ae300 100644 --- a/llvm/test/CodeGen/AArch64/pr164181.ll +++ b/llvm/test/CodeGen/AArch64/pr164181.ll @@ -19,16 +19,16 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-LABEL: f: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #240 -; CHECK-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #144] // 8-byte Spill ; CHECK-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill -; CHECK-NEXT: str w6, [sp, #20] // 4-byte Folded Spill -; CHECK-NEXT: str w4, [sp, #72] // 4-byte Folded Spill -; CHECK-NEXT: str w3, [sp, #112] // 4-byte Folded Spill -; CHECK-NEXT: str w5, [sp, #36] // 4-byte Folded Spill +; CHECK-NEXT: str w6, [sp, #20] // 4-byte Spill +; CHECK-NEXT: str w4, [sp, #72] // 4-byte Spill +; CHECK-NEXT: str w3, [sp, #112] // 4-byte Spill +; CHECK-NEXT: str w5, [sp, #36] // 4-byte Spill ; CHECK-NEXT: tbz w5, #0, .LBB0_43 ; CHECK-NEXT: // %bb.1: // %for.body41.lr.ph ; CHECK-NEXT: ldr x4, [sp, #312] @@ -55,33 +55,33 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: ldrb w19, [sp, #240] ; CHECK-NEXT: mov w25, wzr ; CHECK-NEXT: mov x24, xzr -; CHECK-NEXT: str w8, [sp, #108] // 4-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #108] // 4-byte Spill ; CHECK-NEXT: mov x3, x26 ; CHECK-NEXT: ldp x9, x8, [sp, #344] -; CHECK-NEXT: str w12, [sp, #92] // 4-byte Folded Spill +; CHECK-NEXT: str w12, [sp, #92] // 4-byte Spill ; CHECK-NEXT: mov w12, #1 // =0x1 ; CHECK-NEXT: bic w12, w12, w0 -; CHECK-NEXT: str w12, [sp, #76] // 4-byte Folded Spill +; CHECK-NEXT: str w12, [sp, #76] // 4-byte Spill ; CHECK-NEXT: mov w12, #48 // =0x30 -; CHECK-NEXT: str x9, [sp, #136] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #136] // 8-byte Spill ; CHECK-NEXT: ldp x9, x15, [sp, #328] ; CHECK-NEXT: madd x8, x8, x12, x9 -; CHECK-NEXT: str x8, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #64] // 8-byte Spill ; CHECK-NEXT: add x8, x26, w26, uxtw #1 ; CHECK-NEXT: ldr x20, [x20, :got_lo12:var_50] -; CHECK-NEXT: str x26, [sp, #96] // 8-byte Folded Spill -; CHECK-NEXT: str x14, [sp, #152] // 8-byte Folded Spill +; CHECK-NEXT: str x26, [sp, #96] // 8-byte Spill +; CHECK-NEXT: str x14, [sp, #152] // 8-byte Spill ; CHECK-NEXT: lsl x6, x8, #3 ; CHECK-NEXT: add x8, x14, #120 -; CHECK-NEXT: str x4, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: str w19, [sp, #16] // 4-byte Folded Spill -; CHECK-NEXT: str x8, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x4, [sp, #24] // 8-byte Spill +; CHECK-NEXT: str w19, [sp, #16] // 4-byte Spill +; CHECK-NEXT: str x8, [sp, #80] // 8-byte Spill ; CHECK-NEXT: b .LBB0_4 ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_3: // in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: ldr w19, [sp, #16] // 4-byte Folded Reload -; CHECK-NEXT: ldr x24, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldr x14, [sp, #152] // 8-byte Folded Reload +; CHECK-NEXT: ldr w19, [sp, #16] // 4-byte Reload +; CHECK-NEXT: ldr x24, [sp, #40] // 8-byte Reload +; CHECK-NEXT: ldr x14, [sp, #152] // 8-byte Reload ; CHECK-NEXT: mov w23, #1 // =0x1 ; CHECK-NEXT: mov w30, #1 // =0x1 ; CHECK-NEXT: mov w25, w19 @@ -93,9 +93,9 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: // Child Loop BB0_11 Depth 5 ; CHECK-NEXT: // Child Loop BB0_28 Depth 5 ; CHECK-NEXT: // Child Loop BB0_39 Depth 5 -; CHECK-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #20] // 4-byte Reload ; CHECK-NEXT: mov x12, x24 -; CHECK-NEXT: str x24, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x24, [sp, #48] // 8-byte Spill ; CHECK-NEXT: str w8, [x14] ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: strb w19, [x14] @@ -103,8 +103,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_5: // %for.cond.cleanup93.us ; CHECK-NEXT: // in Loop: Header=BB0_6 Depth=2 -; CHECK-NEXT: ldr w9, [sp, #36] // 4-byte Folded Reload -; CHECK-NEXT: ldr x4, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr w9, [sp, #36] // 4-byte Reload +; CHECK-NEXT: ldr x4, [sp, #24] // 8-byte Reload ; CHECK-NEXT: ldp x24, x12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov x22, xzr ; CHECK-NEXT: mov w25, wzr @@ -118,20 +118,20 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: // Child Loop BB0_11 Depth 5 ; CHECK-NEXT: // Child Loop BB0_28 Depth 5 ; CHECK-NEXT: // Child Loop BB0_39 Depth 5 -; CHECK-NEXT: str x12, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: str x12, [sp, #40] // 8-byte Spill ; CHECK-NEXT: cmn x24, #30 ; CHECK-NEXT: mov x12, #-30 // =0xffffffffffffffe2 ; CHECK-NEXT: add x19, x4, w8, sxtw #2 ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: csel x12, x24, x12, lo ; CHECK-NEXT: mov w4, w30 -; CHECK-NEXT: str x12, [sp, #56] // 8-byte Folded Spill +; CHECK-NEXT: str x12, [sp, #56] // 8-byte Spill ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_7: // %for.cond.cleanup98.us ; CHECK-NEXT: // in Loop: Header=BB0_8 Depth=3 -; CHECK-NEXT: ldr w4, [sp, #72] // 4-byte Folded Reload -; CHECK-NEXT: ldr w23, [sp, #128] // 4-byte Folded Reload +; CHECK-NEXT: ldr w4, [sp, #72] // 4-byte Reload +; CHECK-NEXT: ldr w23, [sp, #128] // 4-byte Reload ; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: mov x22, xzr ; CHECK-NEXT: tbnz w0, #0, .LBB0_5 @@ -143,31 +143,31 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: // Child Loop BB0_11 Depth 5 ; CHECK-NEXT: // Child Loop BB0_28 Depth 5 ; CHECK-NEXT: // Child Loop BB0_39 Depth 5 -; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Reload ; CHECK-NEXT: mov w14, #1152 // =0x480 ; CHECK-NEXT: mov w24, #1 // =0x1 ; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill +; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Spill ; CHECK-NEXT: mov w30, w4 ; CHECK-NEXT: madd x8, x9, x14, x8 ; CHECK-NEXT: mov w14, #1 // =0x1 -; CHECK-NEXT: str x8, [sp, #120] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #120] // 8-byte Spill ; CHECK-NEXT: add x8, x9, x9, lsl #1 ; CHECK-NEXT: lsl x26, x8, #4 ; CHECK-NEXT: sxtb w8, w23 ; CHECK-NEXT: mov w23, w25 -; CHECK-NEXT: str w8, [sp, #116] // 4-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #116] // 4-byte Spill ; CHECK-NEXT: b .LBB0_10 ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_9: // %for.cond510.preheader.us ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: ldr w23, [sp, #92] // 4-byte Folded Reload +; CHECK-NEXT: ldr w23, [sp, #92] // 4-byte Reload ; CHECK-NEXT: mov x22, x8 -; CHECK-NEXT: ldr x3, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x3, [sp, #96] // 8-byte Reload +; CHECK-NEXT: ldr x27, [sp, #80] // 8-byte Reload ; CHECK-NEXT: mov x28, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov x14, xzr -; CHECK-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #76] // 4-byte Reload ; CHECK-NEXT: tbz w8, #31, .LBB0_7 ; CHECK-NEXT: .LBB0_10: // %for.body99.us ; CHECK-NEXT: // Parent Loop BB0_4 Depth=1 @@ -177,9 +177,9 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: // Child Loop BB0_11 Depth 5 ; CHECK-NEXT: // Child Loop BB0_28 Depth 5 ; CHECK-NEXT: // Child Loop BB0_39 Depth 5 -; CHECK-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #116] // 4-byte Reload ; CHECK-NEXT: and w8, w8, w8, asr #31 -; CHECK-NEXT: str w8, [sp, #128] // 4-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #128] // 4-byte Spill ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_11: // %for.body113.us ; CHECK-NEXT: // Parent Loop BB0_4 Depth=1 @@ -190,10 +190,10 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: tbnz w0, #0, .LBB0_11 ; CHECK-NEXT: // %bb.12: // %for.cond131.preheader.us ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #112] // 4-byte Reload ; CHECK-NEXT: mov w4, #1 // =0x1 ; CHECK-NEXT: strb w8, [x18] -; CHECK-NEXT: ldr x8, [sp, #120] // 8-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp, #120] // 8-byte Reload ; CHECK-NEXT: ldrh w8, [x8] ; CHECK-NEXT: cbnz w4, .LBB0_14 ; CHECK-NEXT: // %bb.13: // %cond.true146.us @@ -212,11 +212,11 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: cbz w8, .LBB0_17 ; CHECK-NEXT: // %bb.16: // %if.then.us ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill +; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Spill ; CHECK-NEXT: str wzr, [x18] ; CHECK-NEXT: .LBB0_17: // %if.end.us ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #108] // 4-byte Reload ; CHECK-NEXT: mov w4, #18984 // =0x4a28 ; CHECK-NEXT: mov w25, w23 ; CHECK-NEXT: strb w8, [x18] @@ -227,16 +227,16 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: cbz w8, .LBB0_19 ; CHECK-NEXT: // %bb.18: // %if.then.us.2 ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill +; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Spill ; CHECK-NEXT: strb wzr, [x18] ; CHECK-NEXT: .LBB0_19: // %if.then.us.5 ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: ldr w23, [sp, #132] // 4-byte Folded Reload +; CHECK-NEXT: ldr w23, [sp, #132] // 4-byte Reload ; CHECK-NEXT: mov w8, #29625 // =0x73b9 ; CHECK-NEXT: movk w8, #21515, lsl #16 ; CHECK-NEXT: cmp w23, w8 ; CHECK-NEXT: csel w23, w23, w8, lt -; CHECK-NEXT: str w23, [sp, #132] // 4-byte Folded Spill +; CHECK-NEXT: str w23, [sp, #132] // 4-byte Spill ; CHECK-NEXT: tbz w0, #0, .LBB0_21 ; CHECK-NEXT: // %bb.20: // in Loop: Header=BB0_10 Depth=4 ; CHECK-NEXT: mov w8, wzr @@ -253,8 +253,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: cbz w8, .LBB0_24 ; CHECK-NEXT: // %bb.23: // %if.then.us.7 ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 -; CHECK-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload -; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Folded Spill +; CHECK-NEXT: ldr x8, [sp, #152] // 8-byte Reload +; CHECK-NEXT: str wzr, [sp, #132] // 4-byte Spill ; CHECK-NEXT: str wzr, [x8] ; CHECK-NEXT: .LBB0_24: // %if.end.us.7 ; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4 @@ -348,8 +348,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: .p2align 5, , 16 ; CHECK-NEXT: .LBB0_37: // %if.then466.us ; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5 -; CHECK-NEXT: ldr x28, [sp, #152] // 8-byte Folded Reload -; CHECK-NEXT: ldr x3, [sp, #136] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #152] // 8-byte Reload +; CHECK-NEXT: ldr x3, [sp, #136] // 8-byte Reload ; CHECK-NEXT: sxtb w4, w4 ; CHECK-NEXT: bic w4, w4, w4, asr #31 ; CHECK-NEXT: str x3, [x28] @@ -390,7 +390,7 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var ; CHECK-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload ; CHECK-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #240 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/pr48188.ll b/llvm/test/CodeGen/AArch64/pr48188.ll index d01069696572e..836690c1e7f0d 100644 --- a/llvm/test/CodeGen/AArch64/pr48188.ll +++ b/llvm/test/CodeGen/AArch64/pr48188.ll @@ -9,15 +9,15 @@ define void @test() nounwind { ; GISEL-NEXT: sub sp, sp, #16 ; GISEL-NEXT: mov x8, xzr ; GISEL-NEXT: mov x9, x8 -; GISEL-NEXT: str x9, [sp] // 8-byte Folded Spill -; GISEL-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; GISEL-NEXT: str x9, [sp] // 8-byte Spill +; GISEL-NEXT: str x8, [sp, #8] // 8-byte Spill ; GISEL-NEXT: b .LBB0_1 ; GISEL-NEXT: .LBB0_1: // %loop ; GISEL-NEXT: // =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload -; GISEL-NEXT: ldr x9, [sp] // 8-byte Folded Reload -; GISEL-NEXT: str x9, [sp] // 8-byte Folded Spill -; GISEL-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; GISEL-NEXT: ldr x8, [sp, #8] // 8-byte Reload +; GISEL-NEXT: ldr x9, [sp] // 8-byte Reload +; GISEL-NEXT: str x9, [sp] // 8-byte Spill +; GISEL-NEXT: str x8, [sp, #8] // 8-byte Spill ; GISEL-NEXT: b .LBB0_1 ; ; SDAG-LABEL: test: @@ -25,15 +25,15 @@ define void @test() nounwind { ; SDAG-NEXT: sub sp, sp, #16 ; SDAG-NEXT: mov x1, xzr ; SDAG-NEXT: mov x0, x1 -; SDAG-NEXT: str x1, [sp] // 8-byte Folded Spill -; SDAG-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; SDAG-NEXT: str x1, [sp] // 8-byte Spill +; SDAG-NEXT: str x0, [sp, #8] // 8-byte Spill ; SDAG-NEXT: b .LBB0_1 ; SDAG-NEXT: .LBB0_1: // %loop ; SDAG-NEXT: // =>This Inner Loop Header: Depth=1 -; SDAG-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload -; SDAG-NEXT: ldr x1, [sp] // 8-byte Folded Reload -; SDAG-NEXT: str x1, [sp] // 8-byte Folded Spill -; SDAG-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; SDAG-NEXT: ldr x0, [sp, #8] // 8-byte Reload +; SDAG-NEXT: ldr x1, [sp] // 8-byte Reload +; SDAG-NEXT: str x1, [sp] // 8-byte Spill +; SDAG-NEXT: str x0, [sp, #8] // 8-byte Spill ; SDAG-NEXT: b .LBB0_1 entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll b/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll index 0418720231288..1b085ca85be4d 100644 --- a/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll +++ b/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll @@ -5,15 +5,15 @@ define void @test() nounwind { ; CHECK-LABEL: test: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x1, [sp, #8] // 8-byte Spill ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: bl returns_arg -; CHECK-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x1, [sp, #8] // 8-byte Reload ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: bl accepts_arg -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %x = call i128 @returns_arg(i128 0) diff --git a/llvm/test/CodeGen/AArch64/pr58516.ll b/llvm/test/CodeGen/AArch64/pr58516.ll index d1775a2e707b6..5554826a6ac76 100644 --- a/llvm/test/CodeGen/AArch64/pr58516.ll +++ b/llvm/test/CodeGen/AArch64/pr58516.ll @@ -14,7 +14,7 @@ define void @osfx(ptr %this) comdat personality ptr @__CxxFrameHandler3 { ; CHECK-NEXT: // %bb.0: // %invoke.cont ; CHECK-NEXT: stp x19, x20, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_regp_x x19, 64 -; CHECK-NEXT: str x21, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x21, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x21, 16 ; CHECK-NEXT: stp x29, x30, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 24 @@ -48,7 +48,7 @@ define void @osfx(ptr %this) comdat personality ptr @__CxxFrameHandler3 { ; CHECK-NEXT: .seh_add_fp 24 ; CHECK-NEXT: ldp x29, x30, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 24 -; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x21, 16 ; CHECK-NEXT: ldp x19, x20, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_regp_x x19, 64 @@ -70,7 +70,7 @@ define void @osfx(ptr %this) comdat personality ptr @__CxxFrameHandler3 { ; CHECK-NEXT: .LBB0_3: // %catch ; CHECK-NEXT: stp x19, x20, [sp, #-48]! // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_regp_x x19, 48 -; CHECK-NEXT: str x21, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x21, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x21, 16 ; CHECK-NEXT: stp x29, x30, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 24 @@ -80,7 +80,7 @@ define void @osfx(ptr %this) comdat personality ptr @__CxxFrameHandler3 { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 24 -; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x21, 16 ; CHECK-NEXT: ldp x19, x20, [sp], #48 // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_regp_x x19, 48 diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll index 9b9717c19321e..ca0139f5382dc 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll @@ -18,7 +18,7 @@ define void @caller1(ptr %a) { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill @@ -47,7 +47,7 @@ define void @caller1(ptr %a) { ; CHECK-NEXT: mov x20, x0 ; CHECK-NEXT: bl callee ; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload @@ -118,7 +118,7 @@ define void @caller1(ptr %a) { ; WIN-NEXT: .seh_save_regp x25, 48 ; WIN-NEXT: stp x27, x28, [sp, #64] // 16-byte Folded Spill ; WIN-NEXT: .seh_save_regp x27, 64 -; WIN-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; WIN-NEXT: str x30, [sp, #80] // 8-byte Spill ; WIN-NEXT: .seh_save_reg x30, 80 ; WIN-NEXT: stp d8, d9, [sp, #88] // 16-byte Folded Spill ; WIN-NEXT: .seh_save_fregp d8, 88 @@ -140,7 +140,7 @@ define void @caller1(ptr %a) { ; WIN-NEXT: .seh_save_fregp d10, 104 ; WIN-NEXT: ldp d8, d9, [sp, #88] // 16-byte Folded Reload ; WIN-NEXT: .seh_save_fregp d8, 88 -; WIN-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; WIN-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; WIN-NEXT: .seh_save_reg x30, 80 ; WIN-NEXT: ldp x27, x28, [sp, #64] // 16-byte Folded Reload ; WIN-NEXT: .seh_save_regp x27, 64 @@ -256,7 +256,7 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6 ; WIN-NEXT: // %bb.0: ; WIN-NEXT: sub sp, sp, #32 ; WIN-NEXT: .seh_stackalloc 32 -; WIN-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; WIN-NEXT: str x30, [sp, #16] // 8-byte Spill ; WIN-NEXT: .seh_save_reg x30, 16 ; WIN-NEXT: .seh_endprologue ; WIN-NEXT: ldr x8, [sp, #32] @@ -287,7 +287,7 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6 ; WIN-NEXT: str x15, [sp] ; WIN-NEXT: bl callee_with_many_param2 ; WIN-NEXT: .seh_startepilogue -; WIN-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; WIN-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; WIN-NEXT: .seh_save_reg x30, 16 ; WIN-NEXT: add sp, sp, #32 ; WIN-NEXT: .seh_stackalloc 32 @@ -306,7 +306,7 @@ define i64 @caller3() { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill @@ -358,7 +358,7 @@ define i64 @caller3() { ; CHECK-NEXT: mov w15, #24 // =0x18 ; CHECK-NEXT: bl callee_with_many_param ; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload @@ -454,7 +454,7 @@ define i64 @caller3() { ; WIN-NEXT: .seh_save_regp x25, 64 ; WIN-NEXT: stp x27, x28, [sp, #80] // 16-byte Folded Spill ; WIN-NEXT: .seh_save_regp x27, 80 -; WIN-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; WIN-NEXT: str x30, [sp, #96] // 8-byte Spill ; WIN-NEXT: .seh_save_reg x30, 96 ; WIN-NEXT: stp d8, d9, [sp, #104] // 16-byte Folded Spill ; WIN-NEXT: .seh_save_fregp d8, 104 @@ -500,7 +500,7 @@ define i64 @caller3() { ; WIN-NEXT: .seh_save_fregp d10, 120 ; WIN-NEXT: ldp d8, d9, [sp, #104] // 16-byte Folded Reload ; WIN-NEXT: .seh_save_fregp d8, 104 -; WIN-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; WIN-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; WIN-NEXT: .seh_save_reg x30, 96 ; WIN-NEXT: ldp x27, x28, [sp, #80] // 16-byte Folded Reload ; WIN-NEXT: .seh_save_regp x27, 80 diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll index 48898719f40ce..a359343eacf79 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll @@ -96,7 +96,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill @@ -106,7 +106,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: str w9, [sp] ; CHECK-NEXT: bl callee ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll index 83dd240a6540f..784d5ed4b3c6f 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll @@ -42,7 +42,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill @@ -52,7 +52,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: str w9, [sp] ; CHECK-NEXT: bl callee ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll index 5b501762418ef..31f004e8d72b7 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll @@ -46,10 +46,10 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: stp x9, x0, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: mov w1, #32768 ; =0x8000 -; CHECK-NEXT: str x10, [sp, #8] ; 8-byte Folded Spill -; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Folded Spill -; CHECK-NEXT: str w12, [sp, #4] ; 4-byte Folded Spill -; CHECK-NEXT: str w13, [sp, #20] ; 4-byte Folded Spill +; CHECK-NEXT: str x10, [sp, #8] ; 8-byte Spill +; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Spill +; CHECK-NEXT: str w12, [sp, #4] ; 4-byte Spill +; CHECK-NEXT: str w13, [sp, #20] ; 4-byte Spill ; CHECK-NEXT: bl ___maskrune ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE @@ -57,10 +57,10 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: ldr x14, [x14, __DefaultRuneLocale@GOTPAGEOFF] ; CHECK-NEXT: ldp x11, x9, [sp, #24] ; 16-byte Folded Reload -; CHECK-NEXT: ldr w13, [sp, #20] ; 4-byte Folded Reload -; CHECK-NEXT: ldr w12, [sp, #4] ; 4-byte Folded Reload -; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Folded Reload -; CHECK-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload +; CHECK-NEXT: ldr w13, [sp, #20] ; 4-byte Reload +; CHECK-NEXT: ldr w12, [sp, #4] ; 4-byte Reload +; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Reload +; CHECK-NEXT: ldr x0, [sp, #40] ; 8-byte Reload ; CHECK-NEXT: cbz w8, LBB0_4 ; CHECK-NEXT: b LBB0_6 ; CHECK-NEXT: LBB0_3: ; %cond.true.i.i @@ -90,10 +90,10 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: stp x9, x0, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: mov w1, #32768 ; =0x8000 -; CHECK-NEXT: str x10, [sp, #8] ; 8-byte Folded Spill -; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Folded Spill -; CHECK-NEXT: str w12, [sp, #4] ; 4-byte Folded Spill -; CHECK-NEXT: str w13, [sp, #20] ; 4-byte Folded Spill +; CHECK-NEXT: str x10, [sp, #8] ; 8-byte Spill +; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Spill +; CHECK-NEXT: str w12, [sp, #4] ; 4-byte Spill +; CHECK-NEXT: str w13, [sp, #20] ; 4-byte Spill ; CHECK-NEXT: bl ___maskrune ; CHECK-NEXT: Lloh4: ; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE @@ -101,10 +101,10 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: Lloh5: ; CHECK-NEXT: ldr x14, [x14, __DefaultRuneLocale@GOTPAGEOFF] ; CHECK-NEXT: ldp x11, x9, [sp, #24] ; 16-byte Folded Reload -; CHECK-NEXT: ldr w13, [sp, #20] ; 4-byte Folded Reload -; CHECK-NEXT: ldr w12, [sp, #4] ; 4-byte Folded Reload -; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Folded Reload -; CHECK-NEXT: ldr x0, [sp, #40] ; 8-byte Folded Reload +; CHECK-NEXT: ldr w13, [sp, #20] ; 4-byte Reload +; CHECK-NEXT: ldr w12, [sp, #4] ; 4-byte Reload +; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Reload +; CHECK-NEXT: ldr x0, [sp, #40] ; 8-byte Reload ; CHECK-NEXT: cbnz w8, LBB0_6 ; CHECK-NEXT: LBB0_9: ; %while.end ; CHECK-NEXT: orr w8, w13, w12 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll index e3c623371448b..ae71cd00b9aa4 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -81,7 +81,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: stp q7, q23, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: ldr x14, [x14, #8] ; CHECK-NEXT: mul x0, x17, x17 -; CHECK-NEXT: ldr q23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr q23, [sp, #80] // 16-byte Reload ; CHECK-NEXT: mov v9.16b, v30.16b ; CHECK-NEXT: mov v30.16b, v25.16b ; CHECK-NEXT: mov v25.16b, v20.16b @@ -114,7 +114,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: fmov d5, x20 ; CHECK-NEXT: mul x6, x15, x15 ; CHECK-NEXT: add v23.2d, v23.2d, v0.2d -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-NEXT: mov v1.d[1], x3 ; CHECK-NEXT: mul x7, x15, x5 ; CHECK-NEXT: add v0.2d, v0.2d, v15.2d @@ -123,19 +123,19 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: fmov d4, x6 ; CHECK-NEXT: mul x19, x16, x5 ; CHECK-NEXT: stp q0, q23, [sp, #64] // 32-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-NEXT: fmov d3, x7 -; CHECK-NEXT: ldr q23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q23, [sp, #48] // 16-byte Reload ; CHECK-NEXT: mul x17, x2, x15 ; CHECK-NEXT: add v0.2d, v0.2d, v15.2d -; CHECK-NEXT: ldr q15, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q15, [sp] // 16-byte Reload ; CHECK-NEXT: mov v5.d[1], x0 ; CHECK-NEXT: mov v4.d[1], x6 ; CHECK-NEXT: mul x16, x16, x15 ; CHECK-NEXT: mov v3.d[1], x7 ; CHECK-NEXT: add v15.2d, v15.2d, v1.2d ; CHECK-NEXT: mov v2.d[1], x19 -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-NEXT: mov v1.16b, v6.16b ; CHECK-NEXT: mul x14, x14, x15 ; CHECK-NEXT: mov v6.16b, v20.16b @@ -188,7 +188,7 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload ; CHECK-NEXT: stp q10, q13, [x8, #64] ; CHECK-NEXT: stp q28, q18, [x8] -; CHECK-NEXT: ldr q18, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q18, [sp, #96] // 16-byte Reload ; CHECK-NEXT: stp q29, q12, [x8, #96] ; CHECK-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: stp q18, q15, [x8, #32] diff --git a/llvm/test/CodeGen/AArch64/rem.ll b/llvm/test/CodeGen/AArch64/rem.ll index 7477d33f9aa46..b557ba34107b7 100644 --- a/llvm/test/CodeGen/AArch64/rem.ll +++ b/llvm/test/CodeGen/AArch64/rem.ll @@ -630,51 +630,51 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: smov w12, v3.b[2] ; CHECK-SD-NEXT: smov w17, v3.b[3] ; CHECK-SD-NEXT: smov w16, v1.b[3] -; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[0] -; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Spill ; CHECK-SD-NEXT: smov w9, v0.b[0] -; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Reload ; CHECK-SD-NEXT: smov w15, v3.b[4] ; CHECK-SD-NEXT: smov w14, v1.b[4] ; CHECK-SD-NEXT: smov w4, v3.b[5] ; CHECK-SD-NEXT: smov w1, v1.b[5] ; CHECK-SD-NEXT: smov w2, v3.b[6] ; CHECK-SD-NEXT: smov w18, v1.b[6] -; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Spill ; CHECK-SD-NEXT: smov w21, v3.b[9] ; CHECK-SD-NEXT: smov w20, v1.b[9] -; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill -; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Folded Reload +; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Spill +; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Reload ; CHECK-SD-NEXT: sdiv w11, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[2] ; CHECK-SD-NEXT: smov w9, v0.b[2] -; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Spill ; CHECK-SD-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[3] ; CHECK-SD-NEXT: smov w9, v0.b[3] ; CHECK-SD-NEXT: stp w11, w8, [sp, #48] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[4] -; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Spill ; CHECK-SD-NEXT: stp w9, w10, [sp, #56] // 8-byte Folded Spill ; CHECK-SD-NEXT: smov w9, v0.b[4] ; CHECK-SD-NEXT: sdiv w27, w0, w5 -; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[5] ; CHECK-SD-NEXT: smov w9, v0.b[5] -; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Spill +; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Spill +; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[6] ; CHECK-SD-NEXT: smov w9, v0.b[6] ; CHECK-SD-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[7] ; CHECK-SD-NEXT: smov w9, v0.b[7] @@ -682,18 +682,18 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: sdiv w11, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[8] ; CHECK-SD-NEXT: smov w9, v0.b[8] -; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Spill ; CHECK-SD-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[9] ; CHECK-SD-NEXT: smov w9, v0.b[9] ; CHECK-SD-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[10] ; CHECK-SD-NEXT: smov w9, v0.b[10] ; CHECK-SD-NEXT: stp w11, w8, [sp, #120] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[11] ; CHECK-SD-NEXT: stp w9, w10, [sp, #128] // 8-byte Folded Spill @@ -703,45 +703,45 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: sdiv w11, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[12] ; CHECK-SD-NEXT: smov w9, v0.b[12] -; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Spill +; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[13] ; CHECK-SD-NEXT: smov w9, v0.b[13] ; CHECK-SD-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[14] ; CHECK-SD-NEXT: smov w9, v0.b[14] ; CHECK-SD-NEXT: stp w11, w8, [sp, #180] // 8-byte Folded Spill ; CHECK-SD-NEXT: smov w11, v1.b[2] -; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.b[15] -; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Spill ; CHECK-SD-NEXT: stp w9, w10, [sp, #188] // 8-byte Folded Spill ; CHECK-SD-NEXT: smov w9, v0.b[15] ; CHECK-SD-NEXT: sdiv w22, w11, w12 -; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w10, w9, w8 -; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Spill ; CHECK-SD-NEXT: smov w10, v1.b[0] ; CHECK-SD-NEXT: sdiv w9, w7, w19 ; CHECK-SD-NEXT: sdiv w8, w3, w6 ; CHECK-SD-NEXT: sdiv w23, w10, w13 ; CHECK-SD-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Reload +; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Reload ; CHECK-SD-NEXT: msub w9, w8, w30, w9 -; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Reload +; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Reload ; CHECK-SD-NEXT: msub w8, w8, w29, w30 ; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: msub w10, w23, w13, w10 ; CHECK-SD-NEXT: sdiv w24, w14, w15 ; CHECK-SD-NEXT: msub w13, w27, w5, w0 -; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[1], w9 ; CHECK-SD-NEXT: msub w9, w22, w12, w11 ; CHECK-SD-NEXT: smov w11, v1.b[10] @@ -749,34 +749,34 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: ldp w10, w8, [sp, #20] // 8-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[1], w13 ; CHECK-SD-NEXT: msub w8, w8, w5, w10 -; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Reload ; CHECK-SD-NEXT: smov w10, v3.b[10] ; CHECK-SD-NEXT: sdiv w28, w1, w4 ; CHECK-SD-NEXT: ldp w13, w12, [sp, #56] // 8-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[2], w9 ; CHECK-SD-NEXT: mov v0.b[2], w8 ; CHECK-SD-NEXT: msub w8, w25, w17, w16 -; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Reload +; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Reload ; CHECK-SD-NEXT: msub w12, w12, w5, w13 -; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Reload +; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Reload ; CHECK-SD-NEXT: mov v2.b[3], w8 ; CHECK-SD-NEXT: msub w8, w24, w15, w14 -; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[3], w12 ; CHECK-SD-NEXT: msub w13, w13, w17, w16 -; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Reload ; CHECK-SD-NEXT: sdiv w26, w18, w2 -; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Reload ; CHECK-SD-NEXT: smov w12, v3.b[11] ; CHECK-SD-NEXT: msub w15, w15, w17, w16 ; CHECK-SD-NEXT: smov w14, v1.b[11] ; CHECK-SD-NEXT: mov v2.b[4], w8 ; CHECK-SD-NEXT: msub w8, w28, w4, w1 -; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[4], w13 -; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Reload ; CHECK-SD-NEXT: ldp w17, w16, [sp, #68] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[5], w8 @@ -785,13 +785,13 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: msub w16, w16, w1, w17 ; CHECK-SD-NEXT: smov w15, v3.b[12] ; CHECK-SD-NEXT: msub w8, w26, w2, w18 -; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Reload ; CHECK-SD-NEXT: sdiv w0, w20, w21 ; CHECK-SD-NEXT: ldp w1, w18, [sp, #116] // 8-byte Folded Reload ; CHECK-SD-NEXT: smov w17, v1.b[12] ; CHECK-SD-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[6], w8 -; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[6], w16 ; CHECK-SD-NEXT: msub w18, w18, w2, w1 ; CHECK-SD-NEXT: msub w8, w8, w19, w7 @@ -799,7 +799,7 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[7], w18 ; CHECK-SD-NEXT: smov w18, v3.b[13] ; CHECK-SD-NEXT: mov v2.b[7], w8 -; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; CHECK-SD-NEXT: sdiv w9, w11, w10 ; CHECK-SD-NEXT: msub w1, w1, w4, w2 ; CHECK-SD-NEXT: smov w2, v1.b[13] @@ -809,7 +809,7 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v2.b[8], w8 ; CHECK-SD-NEXT: msub w8, w0, w21, w20 ; CHECK-SD-NEXT: msub w3, w3, w5, w4 -; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Reload ; CHECK-SD-NEXT: ldp w4, w1, [sp, #128] // 8-byte Folded Reload ; CHECK-SD-NEXT: sdiv w13, w14, w12 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload @@ -817,19 +817,19 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[9], w3 ; CHECK-SD-NEXT: msub w8, w9, w10, w11 ; CHECK-SD-NEXT: msub w1, w1, w5, w4 -; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Reload ; CHECK-SD-NEXT: smov w9, v3.b[14] ; CHECK-SD-NEXT: ldp w3, w11, [sp, #176] // 8-byte Folded Reload ; CHECK-SD-NEXT: smov w10, v1.b[14] ; CHECK-SD-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[10], w8 ; CHECK-SD-NEXT: mov v0.b[10], w1 -; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Reload ; CHECK-SD-NEXT: msub w11, w11, w4, w3 ; CHECK-SD-NEXT: sdiv w16, w17, w15 ; CHECK-SD-NEXT: msub w8, w13, w12, w14 -; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Reload +; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[11], w11 ; CHECK-SD-NEXT: smov w11, v3.b[15] ; CHECK-SD-NEXT: msub w13, w13, w1, w14 @@ -838,7 +838,7 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[12], w13 ; CHECK-SD-NEXT: sdiv w0, w2, w18 ; CHECK-SD-NEXT: msub w8, w16, w15, w17 -; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Reload ; CHECK-SD-NEXT: ldp w16, w15, [sp, #200] // 8-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[12], w8 ; CHECK-SD-NEXT: msub w15, w15, w17, w16 @@ -846,15 +846,15 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[13], w15 ; CHECK-SD-NEXT: sdiv w12, w10, w9 ; CHECK-SD-NEXT: msub w8, w0, w18, w2 -; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Reload ; CHECK-SD-NEXT: msub w16, w16, w18, w17 ; CHECK-SD-NEXT: mov v2.b[13], w8 ; CHECK-SD-NEXT: mov v0.b[14], w16 ; CHECK-SD-NEXT: sdiv w13, w14, w11 ; CHECK-SD-NEXT: msub w8, w12, w9, w10 -; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Reload +; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Reload +; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Reload ; CHECK-SD-NEXT: mov v2.b[14], w8 ; CHECK-SD-NEXT: msub w9, w9, w12, w10 ; CHECK-SD-NEXT: mov v0.b[15], w9 @@ -927,8 +927,8 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-GI-NEXT: mov v20.s[2], w9 ; CHECK-GI-NEXT: sdiv w13, w12, w13 ; CHECK-GI-NEXT: mov w12, v4.s[1] -; CHECK-GI-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; CHECK-GI-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: str w8, [sp, #12] // 4-byte Spill +; CHECK-GI-NEXT: ldr w11, [sp, #12] // 4-byte Reload ; CHECK-GI-NEXT: mov v20.s[3], w11 ; CHECK-GI-NEXT: sdiv w15, w12, w14 ; CHECK-GI-NEXT: mov w12, v4.s[2] @@ -1552,51 +1552,51 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: umov w12, v3.b[2] ; CHECK-SD-NEXT: umov w17, v3.b[3] ; CHECK-SD-NEXT: umov w16, v1.b[3] -; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[0] -; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Spill ; CHECK-SD-NEXT: umov w9, v0.b[0] -; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Reload ; CHECK-SD-NEXT: umov w15, v3.b[4] ; CHECK-SD-NEXT: umov w14, v1.b[4] ; CHECK-SD-NEXT: umov w4, v3.b[5] ; CHECK-SD-NEXT: umov w1, v1.b[5] ; CHECK-SD-NEXT: umov w2, v3.b[6] ; CHECK-SD-NEXT: umov w18, v1.b[6] -; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Spill ; CHECK-SD-NEXT: umov w21, v3.b[9] ; CHECK-SD-NEXT: umov w20, v1.b[9] -; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill -; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Folded Reload +; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Spill +; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Reload ; CHECK-SD-NEXT: udiv w11, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[2] ; CHECK-SD-NEXT: umov w9, v0.b[2] -; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Spill ; CHECK-SD-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[3] ; CHECK-SD-NEXT: umov w9, v0.b[3] ; CHECK-SD-NEXT: stp w11, w8, [sp, #48] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[4] -; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Spill ; CHECK-SD-NEXT: stp w9, w10, [sp, #56] // 8-byte Folded Spill ; CHECK-SD-NEXT: umov w9, v0.b[4] ; CHECK-SD-NEXT: udiv w27, w0, w5 -; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[5] ; CHECK-SD-NEXT: umov w9, v0.b[5] -; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Spill +; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Spill +; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[6] ; CHECK-SD-NEXT: umov w9, v0.b[6] ; CHECK-SD-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[7] ; CHECK-SD-NEXT: umov w9, v0.b[7] @@ -1604,18 +1604,18 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: udiv w11, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[8] ; CHECK-SD-NEXT: umov w9, v0.b[8] -; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Spill ; CHECK-SD-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[9] ; CHECK-SD-NEXT: umov w9, v0.b[9] ; CHECK-SD-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[10] ; CHECK-SD-NEXT: umov w9, v0.b[10] ; CHECK-SD-NEXT: stp w11, w8, [sp, #120] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[11] ; CHECK-SD-NEXT: stp w9, w10, [sp, #128] // 8-byte Folded Spill @@ -1625,45 +1625,45 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: udiv w11, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[12] ; CHECK-SD-NEXT: umov w9, v0.b[12] -; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Spill +; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[13] ; CHECK-SD-NEXT: umov w9, v0.b[13] ; CHECK-SD-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill -; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[14] ; CHECK-SD-NEXT: umov w9, v0.b[14] ; CHECK-SD-NEXT: stp w11, w8, [sp, #180] // 8-byte Folded Spill ; CHECK-SD-NEXT: umov w11, v1.b[2] -; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.b[15] -; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Spill ; CHECK-SD-NEXT: stp w9, w10, [sp, #188] // 8-byte Folded Spill ; CHECK-SD-NEXT: umov w9, v0.b[15] ; CHECK-SD-NEXT: udiv w22, w11, w12 -; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Spill ; CHECK-SD-NEXT: udiv w10, w9, w8 -; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Spill ; CHECK-SD-NEXT: umov w10, v1.b[0] ; CHECK-SD-NEXT: udiv w9, w7, w19 ; CHECK-SD-NEXT: udiv w8, w3, w6 ; CHECK-SD-NEXT: udiv w23, w10, w13 ; CHECK-SD-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill -; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Reload +; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Reload ; CHECK-SD-NEXT: msub w9, w8, w30, w9 -; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Reload +; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Reload ; CHECK-SD-NEXT: msub w8, w8, w29, w30 ; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload ; CHECK-SD-NEXT: fmov s0, w8 ; CHECK-SD-NEXT: msub w10, w23, w13, w10 ; CHECK-SD-NEXT: udiv w24, w14, w15 ; CHECK-SD-NEXT: msub w13, w27, w5, w0 -; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[1], w9 ; CHECK-SD-NEXT: msub w9, w22, w12, w11 ; CHECK-SD-NEXT: umov w11, v1.b[10] @@ -1671,34 +1671,34 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: ldp w10, w8, [sp, #20] // 8-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[1], w13 ; CHECK-SD-NEXT: msub w8, w8, w5, w10 -; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Reload ; CHECK-SD-NEXT: umov w10, v3.b[10] ; CHECK-SD-NEXT: udiv w28, w1, w4 ; CHECK-SD-NEXT: ldp w13, w12, [sp, #56] // 8-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[2], w9 ; CHECK-SD-NEXT: mov v0.b[2], w8 ; CHECK-SD-NEXT: msub w8, w25, w17, w16 -; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Reload +; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Reload ; CHECK-SD-NEXT: msub w12, w12, w5, w13 -; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Reload +; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Reload ; CHECK-SD-NEXT: mov v2.b[3], w8 ; CHECK-SD-NEXT: msub w8, w24, w15, w14 -; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[3], w12 ; CHECK-SD-NEXT: msub w13, w13, w17, w16 -; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Reload ; CHECK-SD-NEXT: udiv w26, w18, w2 -; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Reload ; CHECK-SD-NEXT: umov w12, v3.b[11] ; CHECK-SD-NEXT: msub w15, w15, w17, w16 ; CHECK-SD-NEXT: umov w14, v1.b[11] ; CHECK-SD-NEXT: mov v2.b[4], w8 ; CHECK-SD-NEXT: msub w8, w28, w4, w1 -; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[4], w13 -; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Reload ; CHECK-SD-NEXT: ldp w17, w16, [sp, #68] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[5], w8 @@ -1707,13 +1707,13 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: msub w16, w16, w1, w17 ; CHECK-SD-NEXT: umov w15, v3.b[12] ; CHECK-SD-NEXT: msub w8, w26, w2, w18 -; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Reload ; CHECK-SD-NEXT: udiv w0, w20, w21 ; CHECK-SD-NEXT: ldp w1, w18, [sp, #116] // 8-byte Folded Reload ; CHECK-SD-NEXT: umov w17, v1.b[12] ; CHECK-SD-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[6], w8 -; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[6], w16 ; CHECK-SD-NEXT: msub w18, w18, w2, w1 ; CHECK-SD-NEXT: msub w8, w8, w19, w7 @@ -1721,7 +1721,7 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[7], w18 ; CHECK-SD-NEXT: umov w18, v3.b[13] ; CHECK-SD-NEXT: mov v2.b[7], w8 -; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; CHECK-SD-NEXT: udiv w9, w11, w10 ; CHECK-SD-NEXT: msub w1, w1, w4, w2 ; CHECK-SD-NEXT: umov w2, v1.b[13] @@ -1731,7 +1731,7 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v2.b[8], w8 ; CHECK-SD-NEXT: msub w8, w0, w21, w20 ; CHECK-SD-NEXT: msub w3, w3, w5, w4 -; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Reload ; CHECK-SD-NEXT: ldp w4, w1, [sp, #128] // 8-byte Folded Reload ; CHECK-SD-NEXT: udiv w13, w14, w12 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload @@ -1739,19 +1739,19 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[9], w3 ; CHECK-SD-NEXT: msub w8, w9, w10, w11 ; CHECK-SD-NEXT: msub w1, w1, w5, w4 -; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Reload ; CHECK-SD-NEXT: umov w9, v3.b[14] ; CHECK-SD-NEXT: ldp w3, w11, [sp, #176] // 8-byte Folded Reload ; CHECK-SD-NEXT: umov w10, v1.b[14] ; CHECK-SD-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[10], w8 ; CHECK-SD-NEXT: mov v0.b[10], w1 -; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Reload ; CHECK-SD-NEXT: msub w11, w11, w4, w3 ; CHECK-SD-NEXT: udiv w16, w17, w15 ; CHECK-SD-NEXT: msub w8, w13, w12, w14 -; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Reload +; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Reload ; CHECK-SD-NEXT: mov v0.b[11], w11 ; CHECK-SD-NEXT: umov w11, v3.b[15] ; CHECK-SD-NEXT: msub w13, w13, w1, w14 @@ -1760,7 +1760,7 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[12], w13 ; CHECK-SD-NEXT: udiv w0, w2, w18 ; CHECK-SD-NEXT: msub w8, w16, w15, w17 -; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Reload ; CHECK-SD-NEXT: ldp w16, w15, [sp, #200] // 8-byte Folded Reload ; CHECK-SD-NEXT: mov v2.b[12], w8 ; CHECK-SD-NEXT: msub w15, w15, w17, w16 @@ -1768,15 +1768,15 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-SD-NEXT: mov v0.b[13], w15 ; CHECK-SD-NEXT: udiv w12, w10, w9 ; CHECK-SD-NEXT: msub w8, w0, w18, w2 -; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Reload ; CHECK-SD-NEXT: msub w16, w16, w18, w17 ; CHECK-SD-NEXT: mov v2.b[13], w8 ; CHECK-SD-NEXT: mov v0.b[14], w16 ; CHECK-SD-NEXT: udiv w13, w14, w11 ; CHECK-SD-NEXT: msub w8, w12, w9, w10 -; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Reload +; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Reload +; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Reload ; CHECK-SD-NEXT: mov v2.b[14], w8 ; CHECK-SD-NEXT: msub w9, w9, w12, w10 ; CHECK-SD-NEXT: mov v0.b[15], w9 @@ -1849,8 +1849,8 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-GI-NEXT: mov v20.s[2], w9 ; CHECK-GI-NEXT: udiv w13, w12, w13 ; CHECK-GI-NEXT: mov w12, v4.s[1] -; CHECK-GI-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; CHECK-GI-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: str w8, [sp, #12] // 4-byte Spill +; CHECK-GI-NEXT: ldr w11, [sp, #12] // 4-byte Reload ; CHECK-GI-NEXT: mov v20.s[3], w11 ; CHECK-GI-NEXT: udiv w15, w12, w14 ; CHECK-GI-NEXT: mov w12, v4.s[2] @@ -2280,40 +2280,40 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: smov w0, v2.h[4] ; CHECK-SD-NEXT: smov w5, v0.h[4] ; CHECK-SD-NEXT: smov w2, v2.h[7] -; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Spill ; CHECK-SD-NEXT: smov w6, v0.h[7] ; CHECK-SD-NEXT: smov w27, v3.h[0] -; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w9, w9, w8 ; CHECK-SD-NEXT: smov w28, v1.h[0] ; CHECK-SD-NEXT: smov w24, v3.h[1] ; CHECK-SD-NEXT: smov w25, v1.h[1] -; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Reload +; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Reload ; CHECK-SD-NEXT: smov w30, v3.h[2] ; CHECK-SD-NEXT: smov w12, v3.h[3] ; CHECK-SD-NEXT: smov w11, v1.h[3] ; CHECK-SD-NEXT: smov w14, v3.h[5] ; CHECK-SD-NEXT: smov w13, v1.h[5] ; CHECK-SD-NEXT: sdiv w8, w22, w19 -; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Folded Spill -; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Folded Reload +; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Spill +; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Reload ; CHECK-SD-NEXT: msub w21, w20, w21, w23 ; CHECK-SD-NEXT: sdiv w9, w3, w1 -; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w8, w18, w7 ; CHECK-SD-NEXT: stp w9, w8, [sp, #24] // 8-byte Folded Spill ; CHECK-SD-NEXT: smov w8, v2.h[5] ; CHECK-SD-NEXT: smov w9, v0.h[5] ; CHECK-SD-NEXT: sdiv w10, w5, w0 -; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Reload ; CHECK-SD-NEXT: msub w1, w20, w1, w3 -; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Spill +; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Spill ; CHECK-SD-NEXT: fmov s0, w1 -; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Reload ; CHECK-SD-NEXT: msub w1, w1, w19, w22 -; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Reload ; CHECK-SD-NEXT: sdiv w9, w9, w8 ; CHECK-SD-NEXT: smov w8, v2.h[6] ; CHECK-SD-NEXT: mov v0.h[1], w21 @@ -2321,10 +2321,10 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.h[2], w1 -; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Spill ; CHECK-SD-NEXT: sdiv w9, w4, w8 ; CHECK-SD-NEXT: mov v0.h[3], w18 -; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Reload ; CHECK-SD-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill ; CHECK-SD-NEXT: sdiv w8, w6, w2 ; CHECK-SD-NEXT: smov w9, v1.h[4] @@ -2348,8 +2348,8 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: mov v2.h[2], w8 ; CHECK-SD-NEXT: sdiv w16, w9, w10 ; CHECK-SD-NEXT: msub w8, w17, w12, w11 -; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Reload +; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Reload ; CHECK-SD-NEXT: msub w12, w12, w0, w5 ; CHECK-SD-NEXT: mov v2.h[3], w8 ; CHECK-SD-NEXT: mov v0.h[4], w12 @@ -2357,7 +2357,7 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: msub w8, w16, w10, w9 ; CHECK-SD-NEXT: smov w9, v3.h[7] ; CHECK-SD-NEXT: smov w10, v1.h[7] -; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Reload ; CHECK-SD-NEXT: mov v2.h[4], w8 ; CHECK-SD-NEXT: msub w16, w16, w17, w18 ; CHECK-SD-NEXT: mov v0.h[5], w16 @@ -2370,7 +2370,7 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: mov v0.h[6], w13 ; CHECK-SD-NEXT: sdiv w12, w10, w9 ; CHECK-SD-NEXT: msub w8, w11, w15, w3 -; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Reload ; CHECK-SD-NEXT: msub w11, w11, w2, w6 ; CHECK-SD-NEXT: mov v2.h[6], w8 ; CHECK-SD-NEXT: mov v0.h[7], w11 @@ -2748,40 +2748,40 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: umov w0, v2.h[4] ; CHECK-SD-NEXT: umov w5, v0.h[4] ; CHECK-SD-NEXT: umov w2, v2.h[7] -; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Spill ; CHECK-SD-NEXT: umov w6, v0.h[7] ; CHECK-SD-NEXT: umov w27, v3.h[0] -; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Spill ; CHECK-SD-NEXT: udiv w9, w9, w8 ; CHECK-SD-NEXT: umov w28, v1.h[0] ; CHECK-SD-NEXT: umov w24, v3.h[1] ; CHECK-SD-NEXT: umov w25, v1.h[1] -; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Reload +; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Reload ; CHECK-SD-NEXT: umov w30, v3.h[2] ; CHECK-SD-NEXT: umov w12, v3.h[3] ; CHECK-SD-NEXT: umov w11, v1.h[3] ; CHECK-SD-NEXT: umov w14, v3.h[5] ; CHECK-SD-NEXT: umov w13, v1.h[5] ; CHECK-SD-NEXT: udiv w8, w22, w19 -; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Folded Spill -; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Folded Reload +; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Spill +; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Reload ; CHECK-SD-NEXT: msub w21, w20, w21, w23 ; CHECK-SD-NEXT: udiv w9, w3, w1 -; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Spill ; CHECK-SD-NEXT: udiv w8, w18, w7 ; CHECK-SD-NEXT: stp w9, w8, [sp, #24] // 8-byte Folded Spill ; CHECK-SD-NEXT: umov w8, v2.h[5] ; CHECK-SD-NEXT: umov w9, v0.h[5] ; CHECK-SD-NEXT: udiv w10, w5, w0 -; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Reload ; CHECK-SD-NEXT: msub w1, w20, w1, w3 -; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill -; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Spill +; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Spill ; CHECK-SD-NEXT: fmov s0, w1 -; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Reload ; CHECK-SD-NEXT: msub w1, w1, w19, w22 -; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Reload ; CHECK-SD-NEXT: udiv w9, w9, w8 ; CHECK-SD-NEXT: umov w8, v2.h[6] ; CHECK-SD-NEXT: mov v0.h[1], w21 @@ -2789,10 +2789,10 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov v0.h[2], w1 -; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Folded Spill +; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Spill ; CHECK-SD-NEXT: udiv w9, w4, w8 ; CHECK-SD-NEXT: mov v0.h[3], w18 -; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Reload ; CHECK-SD-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill ; CHECK-SD-NEXT: udiv w8, w6, w2 ; CHECK-SD-NEXT: umov w9, v1.h[4] @@ -2816,8 +2816,8 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: mov v2.h[2], w8 ; CHECK-SD-NEXT: udiv w16, w9, w10 ; CHECK-SD-NEXT: msub w8, w17, w12, w11 -; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Folded Reload -; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Reload +; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Reload ; CHECK-SD-NEXT: msub w12, w12, w0, w5 ; CHECK-SD-NEXT: mov v2.h[3], w8 ; CHECK-SD-NEXT: mov v0.h[4], w12 @@ -2825,7 +2825,7 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: msub w8, w16, w10, w9 ; CHECK-SD-NEXT: umov w9, v3.h[7] ; CHECK-SD-NEXT: umov w10, v1.h[7] -; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Reload ; CHECK-SD-NEXT: mov v2.h[4], w8 ; CHECK-SD-NEXT: msub w16, w16, w17, w18 ; CHECK-SD-NEXT: mov v0.h[5], w16 @@ -2838,7 +2838,7 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-SD-NEXT: mov v0.h[6], w13 ; CHECK-SD-NEXT: udiv w12, w10, w9 ; CHECK-SD-NEXT: msub w8, w11, w15, w3 -; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Folded Reload +; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Reload ; CHECK-SD-NEXT: msub w11, w11, w2, w6 ; CHECK-SD-NEXT: mov v2.h[6], w8 ; CHECK-SD-NEXT: mov v0.h[7], w11 @@ -3962,7 +3962,7 @@ define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-SD-NEXT: mov x22, x4 ; CHECK-SD-NEXT: ldp x27, x28, [sp, #160] ; CHECK-SD-NEXT: ldp x29, x19, [sp, #144] -; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: bl __modti3 ; CHECK-SD-NEXT: mov x20, x0 ; CHECK-SD-NEXT: mov x25, x1 @@ -3980,7 +3980,7 @@ define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-SD-NEXT: bl __modti3 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: ldp x0, x1, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x3, x26 ; CHECK-SD-NEXT: bl __modti3 @@ -4033,7 +4033,7 @@ define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-GI-NEXT: ldp x24, x25, [sp, #144] ; CHECK-GI-NEXT: ldp x26, x27, [sp, #160] ; CHECK-GI-NEXT: stp x9, x6, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: bl __modti3 ; CHECK-GI-NEXT: mov x28, x0 ; CHECK-GI-NEXT: mov x29, x1 @@ -4051,7 +4051,7 @@ define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-GI-NEXT: bl __modti3 ; CHECK-GI-NEXT: mov x21, x0 ; CHECK-GI-NEXT: ldp x2, x0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: mov x22, x1 ; CHECK-GI-NEXT: mov x1, x23 ; CHECK-GI-NEXT: bl __modti3 @@ -4308,7 +4308,7 @@ define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-SD-NEXT: mov x22, x4 ; CHECK-SD-NEXT: ldp x27, x28, [sp, #160] ; CHECK-SD-NEXT: ldp x29, x19, [sp, #144] -; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Spill ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: mov x20, x0 ; CHECK-SD-NEXT: mov x25, x1 @@ -4326,7 +4326,7 @@ define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Reload ; CHECK-SD-NEXT: ldp x0, x1, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x3, x26 ; CHECK-SD-NEXT: bl __umodti3 @@ -4379,7 +4379,7 @@ define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-GI-NEXT: ldp x24, x25, [sp, #144] ; CHECK-GI-NEXT: ldp x26, x27, [sp, #160] ; CHECK-GI-NEXT: stp x9, x6, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Spill ; CHECK-GI-NEXT: bl __umodti3 ; CHECK-GI-NEXT: mov x28, x0 ; CHECK-GI-NEXT: mov x29, x1 @@ -4397,7 +4397,7 @@ define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) { ; CHECK-GI-NEXT: bl __umodti3 ; CHECK-GI-NEXT: mov x21, x0 ; CHECK-GI-NEXT: ldp x2, x0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Reload ; CHECK-GI-NEXT: mov x22, x1 ; CHECK-GI-NEXT: mov x1, x23 ; CHECK-GI-NEXT: bl __umodti3 diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll index af922b91b221a..c0495833a3a69 100644 --- a/llvm/test/CodeGen/AArch64/settag-merge.ll +++ b/llvm/test/CodeGen/AArch64/settag-merge.ll @@ -175,7 +175,7 @@ define void @early_128_128(i1 %flag) { ; CHECK-LABEL: early_128_128: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #320 -; CHECK-NEXT: str x29, [sp, #304] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #304] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 320 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: tbz w0, #0, .LBB7_4 diff --git a/llvm/test/CodeGen/AArch64/settag.ll b/llvm/test/CodeGen/AArch64/settag.ll index 3d094ac8a517d..22016a793c13d 100644 --- a/llvm/test/CodeGen/AArch64/settag.ll +++ b/llvm/test/CodeGen/AArch64/settag.ll @@ -151,7 +151,7 @@ define void @stg_alloca17() nounwind { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #288 ; CHECK-NEXT: mov x8, #256 // =0x100 -; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #272] // 8-byte Spill ; CHECK-NEXT: .LBB11_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: st2g sp, [sp], #32 @@ -172,7 +172,7 @@ define void @stg_alloca18() uwtable { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #288 ; CHECK-NEXT: .cfi_def_cfa_offset 288 -; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #272] // 8-byte Spill ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov x8, #256 // =0x100 @@ -203,7 +203,7 @@ define void @test_slh() speculative_load_hardening { ; CHECK-NEXT: cmp sp, #0 ; CHECK-NEXT: csetm x16, ne ; CHECK-NEXT: sub sp, sp, #208 -; CHECK-NEXT: str x30, [sp, #192] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #192] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 208 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov x1, sp @@ -212,7 +212,7 @@ define void @test_slh() speculative_load_hardening { ; CHECK-NEXT: mov sp, x1 ; CHECK-NEXT: bl b ; CHECK-NEXT: cmp sp, #0 -; CHECK-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #192] // 8-byte Reload ; CHECK-NEXT: csetm x16, ne ; CHECK-NEXT: and x30, x30, x16 ; CHECK-NEXT: add sp, sp, #208 diff --git a/llvm/test/CodeGen/AArch64/sibling-call.ll b/llvm/test/CodeGen/AArch64/sibling-call.ll index 98323350bb3b3..60031aec49e07 100644 --- a/llvm/test/CodeGen/AArch64/sibling-call.ll +++ b/llvm/test/CodeGen/AArch64/sibling-call.ll @@ -26,13 +26,13 @@ define dso_local void @caller_to8_from0() { ; CHECK-LABEL: caller_to8_from0: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee_stack8 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -45,7 +45,7 @@ define dso_local void @caller_to8_from0() { define dso_local void @caller_to8_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to8_from8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: b callee_stack8 @@ -58,11 +58,11 @@ define dso_local void @caller_to16_from8([8 x i64], i64 %a) { ; CHECK-LABEL: caller_to16_from8: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl callee_stack16 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -76,7 +76,7 @@ define dso_local void @caller_to16_from8([8 x i64], i64 %a) { define dso_local void @caller_to8_from24([8 x i64], i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: caller_to8_from24: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: b callee_stack8 @@ -106,7 +106,7 @@ define dso_local void @indirect_tail() { ; CHECK-LABEL: indirect_tail: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, func -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ldr x1, [x8, :lo12:func] ; CHECK-NEXT: br x1 diff --git a/llvm/test/CodeGen/AArch64/sincos-stack-slots.ll b/llvm/test/CodeGen/AArch64/sincos-stack-slots.ll index c5fef61c96af3..fa8bb82392708 100644 --- a/llvm/test/CodeGen/AArch64/sincos-stack-slots.ll +++ b/llvm/test/CodeGen/AArch64/sincos-stack-slots.ll @@ -62,7 +62,7 @@ define { double, double } @sincos_f64_value_return(double %x) { ; CHECK-LABEL: sincos_f64_value_return: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: add x0, sp, #24 @@ -70,7 +70,7 @@ define { double, double } @sincos_f64_value_return(double %x) { ; CHECK-NEXT: bl sincos ; CHECK-NEXT: ldr d0, [sp, #24] ; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret entry: @@ -227,7 +227,7 @@ define void @sincos_misaligned_result_stores(double %x, ptr noalias %out_sin, pt ; CHECK-LABEL: sincos_misaligned_result_stores: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w19, -8 @@ -240,7 +240,7 @@ define void @sincos_misaligned_result_stores(double %x, ptr noalias %out_sin, pt ; CHECK-NEXT: bl sincos ; CHECK-NEXT: ldr d0, [sp, #24] ; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: str d0, [x20] ; CHECK-NEXT: str d1, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload @@ -260,7 +260,7 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias % ; CHECK-LABEL: can_fold_with_call_in_chain: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w19, -8 @@ -276,7 +276,7 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias % ; CHECK-NEXT: mov x1, x19 ; CHECK-NEXT: bl sincosf ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll index b281204a66e46..bda5e579e8fd2 100644 --- a/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll +++ b/llvm/test/CodeGen/AArch64/sls-stackprotector-outliner.ll @@ -11,13 +11,13 @@ define hidden void @_ZTv0_n24_N2C6D1Ev(ptr %this) minsize sspreq "target-feature ; CHECK-LABEL: _ZTv0_n24_N2C6D1Ev: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN2C6D1Ev @@ -39,13 +39,13 @@ define hidden void @_ZTv0_n24_N2C6D0Ev(ptr %this) minsize sspreq "target-feature ; CHECK-LABEL: _ZTv0_n24_N2C6D0Ev: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN2C6D0Ev @@ -66,13 +66,13 @@ define hidden void @_ZTv0_n24_N3C10D1Ev(ptr %this) minsize sspreq "target-featur ; CHECK-LABEL: _ZTv0_n24_N3C10D1Ev: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN3C10D1Ev @@ -93,13 +93,13 @@ define hidden void @_ZTv0_n24_N3C10D0Ev(ptr %this) minsize sspreq "target-featur ; CHECK-LABEL: _ZTv0_n24_N3C10D0Ev: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: b.ne .LBB3_2 ; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: b _ZN3C10D0Ev diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll index 0504959bcfa2b..30dbd1cb34667 100644 --- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll @@ -27,7 +27,7 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state ; CHECK-LABEL: agnostic_caller_private_za_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: bl __arm_sme_state_size @@ -49,14 +49,14 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: agnostic_caller_private_za_callee: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: mov x8, x0 ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size @@ -72,7 +72,7 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore ; CHECK-NEWLOWERING-NEXT: mov x0, x8 ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret %res = call i64 @private_za_decl(i64 %v) @@ -298,7 +298,7 @@ define i64 @test_many_callee_arguments( ; CHECK-LABEL: test_many_callee_arguments: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: bl __arm_sme_state_size @@ -316,14 +316,14 @@ define i64 @test_many_callee_arguments( ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: mov x8, x0 ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size @@ -341,7 +341,7 @@ define i64 @test_many_callee_arguments( ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore ; CHECK-NEWLOWERING-NEXT: mov x0, x8 ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9 @@ -355,7 +355,7 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s ; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: bl __arm_sme_state_size ; CHECK-NEXT: mov x8, sp @@ -376,14 +376,14 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: agnostic_za_buffer_alloc_with_stack_probes: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size ; CHECK-NEWLOWERING-NEXT: mov x8, sp @@ -404,7 +404,7 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s ; CHECK-NEWLOWERING-NEXT: mov x0, x19 ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @private_za() diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll index 9bc5ee6988bcf..45e98f9d82c96 100644 --- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll +++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll @@ -50,14 +50,14 @@ define void @streaming_compatible_arg(float %f) #0 { ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbz w19, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: bl non_streaming ; CHECK-NEXT: tbz w19, #0, .LBB1_4 ; CHECK-NEXT: // %bb.3: diff --git a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll index b58a857f3a3cb..b9a542b330c0f 100644 --- a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll +++ b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll @@ -12,18 +12,18 @@ define void @fbyte( %v) #0{ ; NOPAIR-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; NOPAIR-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-18 -; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -76,18 +76,18 @@ define void @fbyte( %v) #0{ ; NOPAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #18 ; NOPAIR-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload @@ -98,18 +98,18 @@ define void @fbyte( %v) #0{ ; PAIR-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; PAIR-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-18 -; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; PAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -162,18 +162,18 @@ define void @fbyte( %v) #0{ ; PAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #18 ; PAIR-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; PAIR-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload @@ -187,18 +187,18 @@ define void @fhalf( %v) #1{ ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-18 -; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -234,18 +234,18 @@ define void @fhalf( %v) #1{ ; NOPAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #18 ; NOPAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; NOPAIR-NEXT: ret @@ -254,26 +254,26 @@ define void @fhalf( %v) #1{ ; PAIR: // %bb.0: ; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-18 -; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill ; PAIR-NEXT: ptrue pn8.b -; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; PAIR-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: smstop sm @@ -289,18 +289,18 @@ define void @fhalf( %v) #1{ ; PAIR-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload ; PAIR-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload ; PAIR-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload -; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #18 ; PAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; PAIR-NEXT: ret @@ -313,18 +313,18 @@ define void @ffloat( %v) #2 { ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; NOPAIR-NEXT: addsvl sp, sp, #-18 -; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -360,18 +360,18 @@ define void @ffloat( %v) #2 { ; NOPAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addsvl sp, sp, #18 ; NOPAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; NOPAIR-NEXT: ret @@ -380,18 +380,18 @@ define void @ffloat( %v) #2 { ; PAIR: // %bb.0: ; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; PAIR-NEXT: addsvl sp, sp, #-18 -; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; PAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -427,18 +427,18 @@ define void @ffloat( %v) #2 { ; PAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; PAIR-NEXT: addsvl sp, sp, #18 ; PAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; PAIR-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll index 48ac156a43875..f7d8b4dc4513d 100644 --- a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll +++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll @@ -12,7 +12,7 @@ define void @locally_streaming_fn() #0 { ; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 05d636158b92b..57025ea172097 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -16,18 +16,18 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline ; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Spill ; CHECK-FISEL-NEXT: smstart sm -; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Reload ; CHECK-FISEL-NEXT: bl streaming_callee -; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-FISEL-NEXT: smstop sm -; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Reload ; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0 ; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0] ; CHECK-FISEL-NEXT: fadd d0, d1, d0 -; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -42,18 +42,18 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline ; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Spill ; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Reload ; CHECK-GISEL-NEXT: bl streaming_callee -; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Reload ; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-GISEL-NEXT: fmov d0, x8 ; CHECK-GISEL-NEXT: fadd d0, d1, d0 -; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -75,18 +75,18 @@ define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Reload ; CHECK-COMMON-NEXT: bl normal_callee -; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Reload ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-COMMON-NEXT: fmov d0, x8 ; CHECK-COMMON-NEXT: fadd d0, d1, d0 -; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -107,24 +107,24 @@ define double @locally_streaming_caller_normal_callee(double %x) nounwind noinli ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Spill +; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Reload +; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Reload ; CHECK-COMMON-NEXT: bl normal_callee -; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-COMMON-NEXT: fmov d0, x8 ; CHECK-COMMON-NEXT: fadd d0, d1, d0 -; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload @@ -170,11 +170,11 @@ define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noin ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: blr x0 ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -191,11 +191,11 @@ define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optno ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: blr x0 ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -243,7 +243,7 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline ; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -268,7 +268,7 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline ; CHECK-COMMON-NEXT: fmov d1, x8 ; CHECK-COMMON-NEXT: fadd d0, d0, d1 ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret entry: @@ -282,7 +282,7 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: f128_call_za: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -302,7 +302,7 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-NEXT: .LBB8_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %res = fadd fp128 %a, %b @@ -319,17 +319,17 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm ; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-COMMON-NEXT: bl __addtf3 -; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: add sp, sp, #112 @@ -343,7 +343,7 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: frem_call_za: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -363,7 +363,7 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-NEXT: .LBB10_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %res = frem double %a, %b @@ -379,17 +379,17 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl fmodf -; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: add sp, sp, #96 @@ -416,13 +416,13 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati ; CHECK-COMMON-NEXT: .LBB12_2: ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl fmodf -; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4 ; CHECK-COMMON-NEXT: // %bb.3: ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: .LBB12_4: ; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll b/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll index f72ccadea5dba..0c886c643c5fb 100644 --- a/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll +++ b/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll @@ -10,7 +10,7 @@ define i32 @load_tls_streaming_compat() nounwind "aarch64_pstate_sm_compatible" ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: mrs x8, SVCR ; CHECK-NEXT: tbz w8, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %entry @@ -30,7 +30,7 @@ define i32 @load_tls_streaming_compat() nounwind "aarch64_pstate_sm_compatible" ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldr w0, [x8, x0] ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -46,7 +46,7 @@ define i32 @load_tls_streaming() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: adrp x0, :tlsdesc:x ; CHECK-NEXT: ldr x1, [x0, :tlsdesc_lo12:x] @@ -59,7 +59,7 @@ define i32 @load_tls_streaming() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldr w0, [x8, x0] ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -116,7 +116,7 @@ define i32 @load_tls_streaming_shared_za() nounwind "aarch64_inout_za" "aarch64_ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp @@ -146,7 +146,7 @@ define i32 @load_tls_streaming_shared_za() nounwind "aarch64_inout_za" "aarch64_ ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: sub sp, x29, #64 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll index a7d51968c5157..50dd0c699284c 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -13,7 +13,7 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_lazy_save_1_callee: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -33,7 +33,7 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { ; CHECK-COMMON-NEXT: .LBB0_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret call void @private_za_callee() @@ -82,7 +82,7 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { ; CHECK-NEWLOWERING-LABEL: test_lazy_save_2_callees: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 @@ -103,7 +103,7 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { ; CHECK-NEWLOWERING-NEXT: .LBB1_2: ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @private_za_callee() @@ -116,7 +116,7 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou ; CHECK-COMMON-LABEL: test_lazy_save_expanded_intrinsic: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -136,7 +136,7 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou ; CHECK-COMMON-NEXT: .LBB2_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %res = call float @llvm.cos.f32(float %a) @@ -296,7 +296,7 @@ define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za" ; CHECK-NEWLOWERING-LABEL: test_lazy_save_mixed_shared_and_private_callees: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32 @@ -335,7 +335,7 @@ define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za" ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: smstop za ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret { @@ -434,7 +434,7 @@ define void @test_many_back2back_private_za_calls() "aarch64_inout_za" { ; CHECK-NEWLOWERING-LABEL: test_many_back2back_private_za_calls: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32 @@ -465,7 +465,7 @@ define void @test_many_back2back_private_za_calls() "aarch64_inout_za" { ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @shared_za_callee() @@ -483,7 +483,7 @@ define void @test_shared_private_shared() nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_shared_private_shared: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -505,7 +505,7 @@ define void @test_shared_private_shared() nounwind "aarch64_inout_za" { ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: bl shared_za_callee ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret call void @shared_za_callee() @@ -532,7 +532,7 @@ define i64 @test_shared_private_shared_i64(i64 %x) nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_shared_private_shared_i64: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -556,7 +556,7 @@ define i64 @test_shared_private_shared_i64(i64 %x) nounwind "aarch64_inout_za" { ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: bl shared_za_callee_i64 ; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %a = call i64 @shared_za_callee_i64(i64 %x) @@ -575,7 +575,7 @@ define i64 @test_many_callee_arguments( ; CHECK-LABEL: test_many_callee_arguments: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: mov x8, sp @@ -600,14 +600,14 @@ define i64 @test_many_callee_arguments( ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 @@ -632,7 +632,7 @@ define i64 @test_many_callee_arguments( ; CHECK-NEWLOWERING-NEXT: mov x0, x8 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9 diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll index 1c341e8daf491..f7f8b223f233b 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll @@ -9,7 +9,7 @@ define void @test_lazy_save() nounwind "aarch64_inout_za" { ; CHECK-LABEL: test_lazy_save: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x30, x29, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 @@ -31,7 +31,7 @@ define void @test_lazy_save() nounwind "aarch64_inout_za" { ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @private_za_callee() diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll index 2e198ad8f0d05..af9466046c27c 100644 --- a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll +++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll @@ -13,7 +13,7 @@ define void @foo() "aarch64_pstate_sm_body" { ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_get_current_vg -; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x0, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll index 78f7e5c009288..0717387ae2963 100644 --- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll +++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll @@ -56,12 +56,12 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" ; CHECK-LABEL: private_za_multiple_exit: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str x2, [sp] // 8-byte Folded Spill -; CHECK-NEXT: str w1, [sp, #8] // 4-byte Folded Spill -; CHECK-NEXT: str w0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str x2, [sp] // 8-byte Spill +; CHECK-NEXT: str w1, [sp, #8] // 4-byte Spill +; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB1_2 @@ -72,26 +72,26 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_2: // %entry -; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero {za} ; CHECK-NEXT: subs x8, x8, #1 ; CHECK-NEXT: b.ne .LBB1_4 ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_3: // %if.else -; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload +; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: smstop za -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_4: // %if.end -; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload +; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-NEXT: subs w0, w8, w9 ; CHECK-NEXT: smstop za -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; @@ -109,21 +109,21 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" ; CHECK-NEWLOWERING-NEXT: b .LBB1_2 ; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %entry ; CHECK-NEWLOWERING-NEXT: smstart za -; CHECK-NEWLOWERING-NEXT: str w1, [sp, #8] // 4-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str w0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str w1, [sp, #8] // 4-byte Spill +; CHECK-NEWLOWERING-NEXT: str w0, [sp, #12] // 4-byte Spill ; CHECK-NEWLOWERING-NEXT: subs x8, x2, #1 ; CHECK-NEWLOWERING-NEXT: b.ne .LBB1_4 ; CHECK-NEWLOWERING-NEXT: b .LBB1_3 ; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %if.else -; CHECK-NEWLOWERING-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; CHECK-NEWLOWERING-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr w8, [sp, #12] // 4-byte Reload +; CHECK-NEWLOWERING-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-NEWLOWERING-NEXT: add w0, w8, w9 ; CHECK-NEWLOWERING-NEXT: smstop za ; CHECK-NEWLOWERING-NEXT: add sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: ret ; CHECK-NEWLOWERING-NEXT: .LBB1_4: // %if.end -; CHECK-NEWLOWERING-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload -; CHECK-NEWLOWERING-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr w8, [sp, #12] // 4-byte Reload +; CHECK-NEWLOWERING-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; CHECK-NEWLOWERING-NEXT: subs w0, w8, w9 ; CHECK-NEWLOWERING-NEXT: smstop za ; CHECK-NEWLOWERING-NEXT: add sp, sp, #16 @@ -146,10 +146,10 @@ define i32 @private_za_trivially_does_not_use_za(i32 %x) "aarch64_new_za" { ; CHECK-LABEL: private_za_trivially_does_not_use_za: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str w0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB2_2 @@ -160,12 +160,12 @@ define i32 @private_za_trivially_does_not_use_za(i32 %x) "aarch64_new_za" { ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: b .LBB2_2 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero {za} ; CHECK-NEXT: add w0, w8, w8 ; CHECK-NEXT: smstop za -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll index ced0d41c22dab..a3027f01e73cf 100644 --- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll +++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll @@ -14,13 +14,13 @@ define void @test0(ptr %callee) nounwind { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl callee_sm ; CHECK-NEXT: bl callee_sm ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -38,13 +38,13 @@ define void @test1() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -142,7 +142,7 @@ define void @test4() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: fmov s0, wzr ; CHECK-NEXT: bl callee_farg @@ -150,7 +150,7 @@ define void @test4() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: bl callee_farg ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -169,16 +169,16 @@ define void @test5(float %f) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: bl callee_farg -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: bl callee_farg ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload @@ -197,18 +197,18 @@ define float @test6(float %f) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: bl callee_farg_fret ; CHECK-NEXT: bl callee_farg_fret -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -253,13 +253,13 @@ define void @test8() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -291,21 +291,21 @@ define aarch64_sve_vector_pcs void @test9_1() "aarch64_pstate_sm_body" { ; CHECK-LABEL: test9_1: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addsvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -351,20 +351,20 @@ define aarch64_sve_vector_pcs void @test9_1() "aarch64_pstate_sm_body" { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addsvl sp, sp, #18 -; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @callee() @@ -384,7 +384,7 @@ define void @test10() "aarch64_pstate_sm_body" { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 @@ -469,7 +469,7 @@ define void @test12() "aarch64_pstate_sm_body" { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 @@ -522,7 +522,7 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z0.s, #0 // =0x0 @@ -552,7 +552,7 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -574,7 +574,7 @@ define void @test14(ptr %callee) nounwind "aarch64_inout_za" { ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp @@ -597,7 +597,7 @@ define void @test14(ptr %callee) nounwind "aarch64_inout_za" { ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: sub sp, x29, #64 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -653,7 +653,7 @@ define void @test16(ptr %callee) nounwind "aarch64_pstate_sm_body" "aarch64_new_ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp @@ -678,7 +678,7 @@ define void @test16(ptr %callee) nounwind "aarch64_pstate_sm_body" "aarch64_new_ ; CHECK-NEXT: smstop za ; CHECK-NEXT: sub sp, x29, #64 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll index 72f6646930624..98735c9518b73 100644 --- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll +++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll @@ -19,7 +19,7 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 @@ -32,7 +32,7 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -51,7 +51,7 @@ define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 @@ -64,7 +64,7 @@ define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -83,7 +83,7 @@ define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 @@ -96,7 +96,7 @@ define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -115,7 +115,7 @@ define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov d0, x0 @@ -128,7 +128,7 @@ define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -147,17 +147,17 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Spill ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Reload ; CHECK-NEXT: bl use_f16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -166,7 +166,7 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -185,17 +185,17 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: bl use_f32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -204,7 +204,7 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -223,17 +223,17 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_f64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -242,7 +242,7 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -266,17 +266,17 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v16i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -285,7 +285,7 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -305,17 +305,17 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v8i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -324,7 +324,7 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -344,17 +344,17 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v4i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -363,7 +363,7 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -383,17 +383,17 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v2i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -402,7 +402,7 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -422,17 +422,17 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Spill ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Reload ; CHECK-NEXT: bl use_v8f16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -441,7 +441,7 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -461,7 +461,7 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 @@ -470,9 +470,9 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v4f32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -481,7 +481,7 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -501,17 +501,17 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v2f64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -520,7 +520,7 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -544,17 +544,17 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v16i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -563,7 +563,7 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -582,17 +582,17 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v8i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -601,7 +601,7 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -620,17 +620,17 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v4i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -639,7 +639,7 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -658,17 +658,17 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v2i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -677,7 +677,7 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -696,17 +696,17 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v8f16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -715,7 +715,7 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -734,17 +734,17 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v8bf16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -753,7 +753,7 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -772,17 +772,17 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v4f32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -791,7 +791,7 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -810,17 +810,17 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl use_v2f64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 @@ -829,7 +829,7 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -851,7 +851,7 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 @@ -861,21 +861,21 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 -; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: bl use_v8i1 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: ldr p0, [x8, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p0, [x8, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -1007,9 +1007,9 @@ define void @dont_coalesce_res_f16(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f16 -; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1037,9 +1037,9 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f32 -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1066,9 +1066,9 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f64 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1099,9 +1099,9 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i8 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1129,9 +1129,9 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i16 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1159,9 +1159,9 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i32 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1189,9 +1189,9 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i64 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1219,9 +1219,9 @@ define void @dont_coalesce_res_v1f16(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f16 -; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; CHECK-NEXT: str h0, [sp, #14] // 2-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload +; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1250,9 +1250,9 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f32 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1280,9 +1280,9 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f64 -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] @@ -1314,9 +1314,9 @@ define void @dont_coalesce_res_v16i8(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v16i8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1344,9 +1344,9 @@ define void @dont_coalesce_res_v8i16(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v8i16 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1374,9 +1374,9 @@ define void @dont_coalesce_res_v4i32(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v4i32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1404,9 +1404,9 @@ define void @dont_coalesce_res_v2i64(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v2i64 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1434,9 +1434,9 @@ define void @dont_coalesce_res_v8f16(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v8f16 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1464,9 +1464,9 @@ define void @dont_coalesce_res_v4f32(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v4f32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] @@ -1494,9 +1494,9 @@ define void @dont_coalesce_res_v2f64(ptr %ptr) #0 { ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v2f64 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll index a3ec2ddb2b872..6c3975a9b452b 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll @@ -12,13 +12,13 @@ define void @locally_streaming_caller_streaming_callee() "aarch64_pstate_sm_body ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -92,18 +92,18 @@ define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_psta ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: add z0.d, z0.d, #41 // =0x29 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload @@ -144,17 +144,17 @@ define <2 x i64> @locally_streaming_caller_compatible_callee_vec_args_ret(<2 x i ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl streaming_compatible_callee_vec_args_ret -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -173,15 +173,15 @@ define {<2 x i64>, <2 x i64>} @locally_streaming_caller_compatible_callee_struct ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl streaming_compatible_callee_vec_arg_struct_ret ; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload @@ -247,7 +247,7 @@ define float @test_arg_survives_loop(float %arg, i32 %N) nounwind "aarch64_pstat ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB9_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -255,12 +255,12 @@ define float @test_arg_survives_loop(float %arg, i32 %N) nounwind "aarch64_pstat ; CHECK-NEXT: b.ne .LBB9_1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: ldr s1, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s1, [sp, #12] // 4-byte Reload ; CHECK-NEXT: fadd s0, s1, s0 -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload @@ -288,12 +288,12 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_body" nounwind { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-checkvl.ll b/llvm/test/CodeGen/AArch64/sme-streaming-checkvl.ll index a1eb1ceeaf19b..58c6e2e27c451 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-checkvl.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-checkvl.ll @@ -51,7 +51,7 @@ define void @foo_non_streaming_pass_arg(ptr %arg) { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_def_cfa wsp, 96 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -87,11 +87,11 @@ define void @foo_streaming_compatible_pass_arg(ptr %arg) #1 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Spill +; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Spill +; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Spill +; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Spill +; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Spill ; CHECK-NEXT: add x29, sp, #1088 ; CHECK-NEXT: .cfi_def_cfa w29, 48 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -133,11 +133,11 @@ define void @foo_streaming_compatible_pass_arg(ptr %arg) #1 { ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa wsp, 1136 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Reload +; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Reload +; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1136 @@ -173,10 +173,10 @@ define void @foo_streaming_pass_arg(ptr %arg) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Spill +; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Spill +; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Spill +; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Spill ; CHECK-NEXT: .cfi_offset w28, -8 ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 @@ -204,10 +204,10 @@ define void @foo_streaming_pass_arg(ptr %arg) #0 { ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa_offset 1120 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Reload +; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1120 @@ -242,7 +242,7 @@ define void @foo_non_streaming_retval(ptr %ptr) { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: stp x28, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 ; CHECK-NEXT: .cfi_def_cfa w29, 48 @@ -314,12 +314,12 @@ define void @foo_streaming_compatible_retval(ptr %ptr) #1 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill -; CHECK-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Spill +; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Spill +; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Spill +; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Spill +; CHECK-NEXT: str x20, [sp, #1120] // 8-byte Spill +; CHECK-NEXT: str x19, [sp, #1128] // 8-byte Spill ; CHECK-NEXT: add x29, sp, #1088 ; CHECK-NEXT: .cfi_def_cfa w29, 48 ; CHECK-NEXT: .cfi_offset w19, -8 @@ -363,12 +363,12 @@ define void @foo_streaming_compatible_retval(ptr %ptr) #1 { ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa wsp, 1136 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #1128] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp, #1120] // 8-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp, #1120] // 8-byte Reload +; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Reload +; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Reload +; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1136 @@ -405,11 +405,11 @@ define void @foo_streaming_retval(ptr %ptr) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Spill +; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Spill +; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Spill +; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Spill +; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Spill ; CHECK-NEXT: add x29, sp, #1088 ; CHECK-NEXT: .cfi_def_cfa w29, 48 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -445,11 +445,11 @@ define void @foo_streaming_retval(ptr %ptr) #0 { ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: .cfi_def_cfa wsp, 1136 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Reload +; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Reload +; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1136 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll index df88f37195ed6..944c131260c6e 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -125,11 +125,11 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -138,16 +138,16 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl normal_callee_vec_arg -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: tbz w19, #0, .LBB4_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d @@ -155,7 +155,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -173,18 +173,18 @@ define @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_predicate_vectors( @streaming_compatible_with_predicate_vectors( @streaming_compatible_with_predicate_vectors( @streaming_compatible_with_predicate_vectors( @smstart_clobber_simdfp(<4 x i32> %x) nounwind { ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -152,18 +152,18 @@ define @smstart_clobber_sve( %x) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -203,18 +203,18 @@ define @smstart_clobber_sve( %x) nounwind { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -229,18 +229,18 @@ define @smstart_clobber_sve_duplicate( %x) ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -281,18 +281,18 @@ define @smstart_clobber_sve_duplicate( %x) ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -310,15 +310,15 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Spill ; CHECK-NEXT: stp d0, d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr d0, [sp] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp] // 8-byte Reload ; CHECK-NEXT: bl cos -; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill +; CHECK-NEXT: str d0, [sp] // 8-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -343,12 +343,12 @@ define void @disable_tailcallopt() nounwind { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -402,7 +402,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Spill ; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm @@ -411,7 +411,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: bl bar ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll index 7be5e6fe29869..30f98b10cfdaa 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll @@ -262,7 +262,7 @@ define aarch64_sve_vector_pcs void @streaming_compatible_caller_conditional_mode ; CHECK: .cfi_def_cfa_offset 48 ; CHECK: cntd x9 ; CHECK: stp x28, x19, [sp, #32] // 16-byte Folded Spill -; CHECK: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK: str x9, [sp, #16] // 8-byte Spill ; CHECK: mov x29, sp ; CHECK: .cfi_def_cfa w29, 48 ; CHECK: .cfi_offset vg, -32 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll index 7efa1d8f7a6a7..72f0eac9b659c 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll @@ -18,21 +18,21 @@ define void @test_no_stackslot_scavenging(float %f) #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Reload ; CHECK-NEXT: bl use_f ; CHECK-NEXT: smstart sm ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ldp x30, x24, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -60,7 +60,7 @@ define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-po ; CHECK-NEXT: lsl x9, x0, #3 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov x19, sp -; CHECK-NEXT: str s0, [x19, #12] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [x19, #12] // 4-byte Spill ; CHECK-NEXT: add x9, x9, #15 ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 ; CHECK-NEXT: sub x8, x8, x9 @@ -68,7 +68,7 @@ define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-po ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr s0, [x19, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [x19, #12] // 4-byte Reload ; CHECK-NEXT: bl use_f ; CHECK-NEXT: smstart sm ; CHECK-NEXT: sub sp, x29, #64 diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index 125cea7dc469a..06fb52ae10374 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -23,7 +23,7 @@ define void @vg_unwind_simple() #0 { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 @@ -66,7 +66,7 @@ define void @vg_unwind_simple() #0 { ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset vg, -16 @@ -141,7 +141,7 @@ define void @vg_unwind_needs_gap() #0 { ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -192,7 +192,7 @@ define void @vg_unwind_needs_gap() #0 { ; FP-CHECK-NEXT: smstart sm ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; FP-CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Folded Reload +; FP-CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -231,7 +231,7 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #96] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 @@ -243,9 +243,9 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; CHECK-NEXT: .cfi_offset b13, -80 ; CHECK-NEXT: .cfi_offset b14, -88 ; CHECK-NEXT: .cfi_offset b15, -96 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl fixed_callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload @@ -278,7 +278,7 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; FP-CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill +; FP-CHECK-NEXT: str x9, [sp, #96] // 8-byte Spill ; FP-CHECK-NEXT: add x29, sp, #80 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset vg, -16 @@ -292,9 +292,9 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; FP-CHECK-NEXT: .cfi_offset b13, -80 ; FP-CHECK-NEXT: .cfi_offset b14, -88 ; FP-CHECK-NEXT: .cfi_offset b15, -96 -; FP-CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; FP-CHECK-NEXT: str q0, [sp] // 16-byte Spill ; FP-CHECK-NEXT: smstop sm -; FP-CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; FP-CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; FP-CHECK-NEXT: bl fixed_callee ; FP-CHECK-NEXT: smstart sm ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 112 @@ -332,7 +332,7 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 48 ; CHECK-NEXT: .cfi_offset w27, -8 @@ -341,26 +341,26 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-NEXT: .cfi_offset w30, -40 ; CHECK-NEXT: .cfi_offset w29, -48 ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * IncomingVG - 48 @@ -396,18 +396,18 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_restore z8 ; CHECK-NEXT: .cfi_restore z9 @@ -434,7 +434,7 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; FP-CHECK-NEXT: .cfi_def_cfa_offset 48 ; FP-CHECK-NEXT: cntd x9 ; FP-CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; FP-CHECK-NEXT: str x9, [sp, #16] // 8-byte Spill ; FP-CHECK-NEXT: mov x29, sp ; FP-CHECK-NEXT: .cfi_def_cfa w29, 48 ; FP-CHECK-NEXT: .cfi_offset w27, -8 @@ -443,26 +443,26 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; FP-CHECK-NEXT: .cfi_offset w30, -40 ; FP-CHECK-NEXT: .cfi_offset w29, -48 ; FP-CHECK-NEXT: addvl sp, sp, #-18 -; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill ; FP-CHECK-NEXT: ptrue pn8.b -; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill -; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill -; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill -; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill -; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * IncomingVG - 48 @@ -498,18 +498,18 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload -; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; FP-CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; FP-CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; FP-CHECK-NEXT: addvl sp, sp, #18 ; FP-CHECK-NEXT: .cfi_restore z8 ; FP-CHECK-NEXT: .cfi_restore z9 @@ -582,7 +582,7 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -642,7 +642,7 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; FP-CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; FP-CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; FP-CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -682,7 +682,7 @@ define void @vg_locally_streaming_fn() #3 { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 @@ -727,7 +727,7 @@ define void @vg_locally_streaming_fn() #3 { ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset vg, -16 @@ -809,7 +809,7 @@ define void @streaming_compatible_to_streaming() #4 { ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB6_4: ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -865,7 +865,7 @@ define void @streaming_compatible_to_streaming() #4 { ; FP-CHECK-NEXT: .LBB6_4: ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -926,7 +926,7 @@ define void @streaming_compatible_to_non_streaming() #4 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB7_4: ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -982,7 +982,7 @@ define void @streaming_compatible_to_non_streaming() #4 { ; FP-CHECK-NEXT: .LBB7_4: ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -1053,7 +1053,7 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ; NO-SVE-CHECK-NEXT: .LBB8_4: ; NO-SVE-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; NO-SVE-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Reload ; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -1130,7 +1130,7 @@ define void @vg_unwind_noasync() #5 { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 @@ -1173,7 +1173,7 @@ define void @vg_unwind_noasync() #5 { ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset vg, -16 diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll index c753e9c569d22..240b204d15210 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll @@ -49,7 +49,7 @@ define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind { ; CHECK-NEWLOWERING-LABEL: private_za_loop: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 @@ -78,7 +78,7 @@ define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind { ; CHECK-NEWLOWERING-NEXT: .LBB0_5: // %exit ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret entry: @@ -367,7 +367,7 @@ define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounw ; CHECK-LABEL: cond_clobber_followed_by_clobber: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 @@ -403,14 +403,14 @@ define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounw ; CHECK-NEXT: .LBB5_6: // %exit ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: b shared_za_call ; ; CHECK-NEWLOWERING-LABEL: cond_clobber_followed_by_clobber: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 @@ -436,7 +436,7 @@ define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounw ; CHECK-NEWLOWERING-NEXT: .LBB5_4: // %exit ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: b shared_za_call tail call void @shared_za_call() @@ -638,7 +638,7 @@ define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind ; CHECK-LABEL: critical_edge_mixed_za: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 @@ -681,14 +681,14 @@ define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: .LBB9_9: // %common.ret ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: critical_edge_mixed_za: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 @@ -731,7 +731,7 @@ define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: .LBB9_9: // %common.ret ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll index dcdc56c669077..ef74825e02881 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll @@ -31,7 +31,7 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe ; CHECK-NEXT: .cfi_lsda 28, .Lexception0 ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa w29, 32 @@ -46,7 +46,7 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe ; CHECK-NEXT: tbnz w0, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %return_normally ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: b shared_za_call ; CHECK-NEXT: .LBB0_2: // %throw_exception @@ -260,7 +260,7 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v ; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception1 ; CHECK-SDAG-NEXT: // %bb.0: ; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-SDAG-NEXT: mov x29, sp ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 @@ -287,7 +287,7 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v ; CHECK-SDAG-NEXT: .Ltmp4: // EH_LABEL ; CHECK-SDAG-NEXT: .LBB1_3: // %after_catch ; CHECK-SDAG-NEXT: mov sp, x29 -; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-SDAG-NEXT: b shared_za_call ; CHECK-SDAG-NEXT: .LBB1_4: // %catch @@ -417,7 +417,7 @@ define void @try_catch_shared_za_callee() "aarch64_new_za" personality ptr @__gx ; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception2 ; CHECK-SDAG-NEXT: // %bb.0: // %prelude ; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-SDAG-NEXT: mov x29, sp ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 @@ -443,7 +443,7 @@ define void @try_catch_shared_za_callee() "aarch64_new_za" personality ptr @__gx ; CHECK-SDAG-NEXT: .LBB2_3: // %exit ; CHECK-SDAG-NEXT: smstop za ; CHECK-SDAG-NEXT: mov sp, x29 -; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-SDAG-NEXT: ret ; CHECK-SDAG-NEXT: .LBB2_4: // %catch @@ -573,7 +573,7 @@ define void @try_catch_shared_zt0_callee() "aarch64_inout_zt0" personality ptr @ ; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception3 ; CHECK-SDAG-NEXT: // %bb.0: ; CHECK-SDAG-NEXT: sub sp, sp, #96 -; CHECK-SDAG-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-SDAG-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 96 ; CHECK-SDAG-NEXT: .cfi_offset w19, -8 @@ -589,7 +589,7 @@ define void @try_catch_shared_zt0_callee() "aarch64_inout_zt0" personality ptr @ ; CHECK-SDAG-NEXT: .Ltmp10: // EH_LABEL ; CHECK-SDAG-NEXT: // %bb.1: // %return_normally ; CHECK-SDAG-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-SDAG-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-SDAG-NEXT: add sp, sp, #96 ; CHECK-SDAG-NEXT: ret ; CHECK-SDAG-NEXT: .LBB3_2: // %unwind_dtors @@ -638,7 +638,7 @@ define void @try_catch_agnostic_za() "aarch64_za_state_agnostic" personality ptr ; CHECK-NEXT: .cfi_lsda 28, .Lexception4 ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -656,7 +656,7 @@ define void @try_catch_agnostic_za() "aarch64_za_state_agnostic" personality ptr ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_2: // %catch @@ -672,7 +672,7 @@ define void @try_catch_agnostic_za() "aarch64_za_state_agnostic" personality ptr ; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception4 ; CHECK-SDAG-NEXT: // %bb.0: ; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-SDAG-NEXT: mov x29, sp ; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 ; CHECK-SDAG-NEXT: .cfi_offset w19, -16 @@ -690,7 +690,7 @@ define void @try_catch_agnostic_za() "aarch64_za_state_agnostic" personality ptr ; CHECK-SDAG-NEXT: .Ltmp13: // EH_LABEL ; CHECK-SDAG-NEXT: .LBB4_1: // %exit ; CHECK-SDAG-NEXT: mov sp, x29 -; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-SDAG-NEXT: ret ; CHECK-SDAG-NEXT: .LBB4_2: // %catch @@ -748,7 +748,7 @@ define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personal ; CHECK-NEXT: .cfi_lsda 28, .Lexception5 ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -766,7 +766,7 @@ define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personal ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_2: // %catch @@ -782,7 +782,7 @@ define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personal ; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception5 ; CHECK-SDAG-NEXT: // %bb.0: // %entry ; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-SDAG-NEXT: mov x29, sp ; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 ; CHECK-SDAG-NEXT: .cfi_offset w19, -16 @@ -800,7 +800,7 @@ define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personal ; CHECK-SDAG-NEXT: .Ltmp16: // EH_LABEL ; CHECK-SDAG-NEXT: .LBB5_1: // %exit ; CHECK-SDAG-NEXT: mov sp, x29 -; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-SDAG-NEXT: ret ; CHECK-SDAG-NEXT: .LBB5_2: // %catch @@ -886,7 +886,7 @@ define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personal ; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception6 ; CHECK-SDAG-NEXT: // %bb.0: // %entry ; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-SDAG-NEXT: mov x29, sp ; CHECK-SDAG-NEXT: sub sp, sp, #16 ; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 @@ -913,7 +913,7 @@ define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personal ; CHECK-SDAG-NEXT: .Ltmp19: // EH_LABEL ; CHECK-SDAG-NEXT: .LBB6_3: // %exit ; CHECK-SDAG-NEXT: mov sp, x29 -; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-SDAG-NEXT: ret ; CHECK-SDAG-NEXT: .LBB6_4: // %catch diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index 5b81f5dafe421..031514c986569 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -33,7 +33,7 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za ; CHECK-LABEL: za_zt0_shared_caller_no_state_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: rdsvl x8, #1 @@ -56,14 +56,14 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: za_zt0_shared_caller_no_state_callee: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #80 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 @@ -86,7 +86,7 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: ldr zt0, [x19] ; CHECK-NEWLOWERING-NEXT: mov sp, x29 -; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void %callee(); @@ -395,7 +395,7 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin ; CHECK-COMMON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x20, sp ; CHECK-COMMON-NEXT: mov x19, x0 -; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-COMMON-NEXT: str zt0, [x20] ; CHECK-COMMON-NEXT: smstop za ; CHECK-COMMON-NEXT: blr x0 @@ -417,7 +417,7 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: ldr zt0, [x20] ; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-COMMON-NEXT: add sp, sp, #96 ; CHECK-COMMON-NEXT: ret call void %callee() diff --git a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll index 8398e07f63801..971b1e240ad15 100644 --- a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll @@ -27,7 +27,7 @@ define { , , , , , , , } @bfcvt_tuple(i64 %stride, ptr ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -141,7 +141,7 @@ define { , } @bfcvt_tuple(i64 %stride, ptr ; CHECK-NEXT: bfcvt z1.b, { z10.h, z11.h } ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll index ae561016e58b1..111b3fde29a37 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll @@ -597,7 +597,7 @@ define void @udot_single_za32_u16_vg1x2_tuple(ptr %ptr, i64 %stride, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill @@ -1370,7 +1370,7 @@ define void @udot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z10.b, z11.b }, z0.b[0] ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1446,7 +1446,7 @@ define void @udot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-9 ; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, wzr @@ -1474,7 +1474,7 @@ define void @udot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1520,7 +1520,7 @@ define void @udot_single_za32_u16_vg1x4_x2load_x4tuple(ptr %ptr, i64 %stride, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill @@ -1674,7 +1674,7 @@ define void @usdot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z10.b, z11.b }, z0.b[0] ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1750,7 +1750,7 @@ define void @usdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-9 ; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, wzr @@ -1778,7 +1778,7 @@ define void @usdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1917,7 +1917,7 @@ define void @sdot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill @@ -1928,7 +1928,7 @@ define void @sdot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z10.b, z11.b }, z0.b[0] ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -2004,7 +2004,7 @@ define void @sdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-9 ; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, wzr @@ -2032,7 +2032,7 @@ define void @sdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -2171,7 +2171,7 @@ define void @sudot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill @@ -2182,7 +2182,7 @@ define void @sudot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z10.b, z11.b }, z0.b[0] ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -2258,7 +2258,7 @@ define void @sudot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-9 ; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, wzr @@ -2286,7 +2286,7 @@ define void @sudot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll index 19ac03d1200b7..0e62c7f223414 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll @@ -55,7 +55,7 @@ define @ld1_x2_i8_z0_z8( %unused, @ld1_x2_i8_z0_z8( %unused, @ld1_x2_i8_z0_z8( %unused, @ld1_x2_i8_z0_z8( %unused, @ld1_x2_i8_z0_z8_scalar( %unused, @ld1_x2_i8_z0_z8_scalar( %unused, @ld1_x2_i8_z0_z8_scalar( %unused, @ld1_x2_i8_z0_z8_scalar( %unused, @ld1_x2_i16_z0_z8( %unused, @ld1_x2_i16_z0_z8( %unused, @ld1_x2_i16_z0_z8( %unused, @ld1_x2_i16_z0_z8( %unused, @ld1_x2_i16_z0_z8_scalar( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -543,7 +543,7 @@ define @ld1_x2_i16_z0_z8_scalar( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -569,7 +569,7 @@ define @ld1_x2_i16_z0_z8_scalar( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -611,7 +611,7 @@ define @ld1_x2_i16_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -677,7 +677,7 @@ define @ld1_x2_i32_z0_z8( %unused, @ld1_x2_i32_z0_z8( %unused, @ld1_x2_i32_z0_z8( %unused, @ld1_x2_i32_z0_z8( %unused, @ld1_x2_i32_z0_z8_scalar( %unused, < ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -854,7 +854,7 @@ define @ld1_x2_i32_z0_z8_scalar( %unused, < ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -880,7 +880,7 @@ define @ld1_x2_i32_z0_z8_scalar( %unused, < ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -922,7 +922,7 @@ define @ld1_x2_i32_z0_z8_scalar( %unused, < ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -988,7 +988,7 @@ define @ld1_x2_i64_z0_z8( %unused, @ld1_x2_i64_z0_z8( %unused, @ld1_x2_i64_z0_z8( %unused, @ld1_x2_i64_z0_z8( %unused, @ld1_x2_i64_z0_z8_scalar( %unused, < ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1165,7 +1165,7 @@ define @ld1_x2_i64_z0_z8_scalar( %unused, < ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1191,7 +1191,7 @@ define @ld1_x2_i64_z0_z8_scalar( %unused, < ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -1233,7 +1233,7 @@ define @ld1_x2_i64_z0_z8_scalar( %unused, < ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1301,7 +1301,7 @@ define @ld1_x4_i8_z0_z4_z8_z12( %unused, @ld1_x4_i8_z0_z4_z8_z12( %unused, @ld1_x4_i8_z0_z4_z8_z12( %unused, @ld1_x4_i8_z0_z4_z8_z12( %unused, @ld1_x4_i8_z0_z4_z8_z12_scalar( %unu ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1488,7 +1488,7 @@ define @ld1_x4_i8_z0_z4_z8_z12_scalar( %unu ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1516,7 +1516,7 @@ define @ld1_x4_i8_z0_z4_z8_z12_scalar( %unu ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1560,7 +1560,7 @@ define @ld1_x4_i8_z0_z4_z8_z12_scalar( %unu ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1632,7 +1632,7 @@ define @ld1_x4_i16_z0_z4_z8_z12( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1654,7 +1654,7 @@ define @ld1_x4_i16_z0_z4_z8_z12( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1682,7 +1682,7 @@ define @ld1_x4_i16_z0_z4_z8_z12( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1726,7 +1726,7 @@ define @ld1_x4_i16_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1798,7 +1798,7 @@ define @ld1_x4_i16_z0_z4_z8_z12_scalar( %u ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1820,7 +1820,7 @@ define @ld1_x4_i16_z0_z4_z8_z12_scalar( %u ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1848,7 +1848,7 @@ define @ld1_x4_i16_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1892,7 +1892,7 @@ define @ld1_x4_i16_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1964,7 +1964,7 @@ define @ld1_x4_i32_z0_z4_z8_z12( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1986,7 +1986,7 @@ define @ld1_x4_i32_z0_z4_z8_z12( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2014,7 +2014,7 @@ define @ld1_x4_i32_z0_z4_z8_z12( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -2058,7 +2058,7 @@ define @ld1_x4_i32_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2130,7 +2130,7 @@ define @ld1_x4_i32_z0_z4_z8_z12_scalar( %u ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -2152,7 +2152,7 @@ define @ld1_x4_i32_z0_z4_z8_z12_scalar( %u ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2180,7 +2180,7 @@ define @ld1_x4_i32_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -2224,7 +2224,7 @@ define @ld1_x4_i32_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2296,7 +2296,7 @@ define @ld1_x4_i64_z0_z4_z8_z12( %unused, < ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -2318,7 +2318,7 @@ define @ld1_x4_i64_z0_z4_z8_z12( %unused, < ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2346,7 +2346,7 @@ define @ld1_x4_i64_z0_z4_z8_z12( %unused, < ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -2390,7 +2390,7 @@ define @ld1_x4_i64_z0_z4_z8_z12( %unused, < ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2462,7 +2462,7 @@ define @ld1_x4_i64_z0_z4_z8_z12_scalar( %un ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -2484,7 +2484,7 @@ define @ld1_x4_i64_z0_z4_z8_z12_scalar( %un ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2512,7 +2512,7 @@ define @ld1_x4_i64_z0_z4_z8_z12_scalar( %un ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -2556,7 +2556,7 @@ define @ld1_x4_i64_z0_z4_z8_z12_scalar( %un ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll index 039b6214ac860..1cec418249d46 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll @@ -8,7 +8,7 @@ define @ldnt1_x2_i8_z0_z8( %unused, @ldnt1_x2_i8_z0_z8( %unused, @ldnt1_x2_i8_z0_z8( %unused, @ldnt1_x2_i8_z0_z8( %unused, @ldnt1_x2_i8_z0_z8_scalar( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -138,7 +138,7 @@ define @ldnt1_x2_i8_z0_z8_scalar( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -164,7 +164,7 @@ define @ldnt1_x2_i8_z0_z8_scalar( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -206,7 +206,7 @@ define @ldnt1_x2_i8_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -225,7 +225,7 @@ define @ldnt1_x2_i16_z0_z8( %unused, @ldnt1_x2_i16_z0_z8( %unused, @ldnt1_x2_i16_z0_z8( %unused, @ldnt1_x2_i16_z0_z8( %unused, @ldnt1_x2_i16_z0_z8_scalar( %unused ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -355,7 +355,7 @@ define @ldnt1_x2_i16_z0_z8_scalar( %unused ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -381,7 +381,7 @@ define @ldnt1_x2_i16_z0_z8_scalar( %unused ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -423,7 +423,7 @@ define @ldnt1_x2_i16_z0_z8_scalar( %unused ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -442,7 +442,7 @@ define @ldnt1_x2_i32_z0_z8( %unused, @ldnt1_x2_i32_z0_z8( %unused, @ldnt1_x2_i32_z0_z8( %unused, @ldnt1_x2_i32_z0_z8( %unused, @ldnt1_x2_i32_z0_z8_scalar( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -572,7 +572,7 @@ define @ldnt1_x2_i32_z0_z8_scalar( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -598,7 +598,7 @@ define @ldnt1_x2_i32_z0_z8_scalar( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -640,7 +640,7 @@ define @ldnt1_x2_i32_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -659,7 +659,7 @@ define @ldnt1_x2_i64_z0_z8( %unused, @ldnt1_x2_i64_z0_z8( %unused, @ldnt1_x2_i64_z0_z8( %unused, @ldnt1_x2_i64_z0_z8( %unused, @ldnt1_x2_i64_z0_z8_scalar( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -789,7 +789,7 @@ define @ldnt1_x2_i64_z0_z8_scalar( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -815,7 +815,7 @@ define @ldnt1_x2_i64_z0_z8_scalar( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-16 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -857,7 +857,7 @@ define @ldnt1_x2_i64_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -876,7 +876,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -898,7 +898,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -926,7 +926,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -970,7 +970,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -992,7 +992,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12_scalar( %u ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1014,7 +1014,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12_scalar( %u ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1042,7 +1042,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1086,7 +1086,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1109,7 +1109,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12( %unused ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1131,7 +1131,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12( %unused ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1159,7 +1159,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12( %unused ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1203,7 +1203,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12( %unused ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1225,7 +1225,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12_scalar( ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1247,7 +1247,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12_scalar( ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1275,7 +1275,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12_scalar( ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1319,7 +1319,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12_scalar( ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1342,7 +1342,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12( %unused ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1364,7 +1364,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12( %unused ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1392,7 +1392,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12( %unused ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1436,7 +1436,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12( %unused ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1458,7 +1458,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12_scalar( ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1480,7 +1480,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12_scalar( ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1508,7 +1508,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12_scalar( ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1552,7 +1552,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12_scalar( ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1575,7 +1575,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12( %unused, ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1597,7 +1597,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12( %unused, ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1625,7 +1625,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12( %unused, ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1669,7 +1669,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1691,7 +1691,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12_scalar( % ; STRIDED: // %bb.0: ; STRIDED-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; STRIDED-NEXT: addvl sp, sp, #-17 -; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; STRIDED-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; STRIDED-NEXT: mov p8.b, p0.b ; STRIDED-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; STRIDED-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill @@ -1713,7 +1713,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12_scalar( % ; STRIDED-NEXT: //APP ; STRIDED-NEXT: nop ; STRIDED-NEXT: //NO_APP -; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; STRIDED-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; STRIDED-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; STRIDED-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1741,7 +1741,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12_scalar( % ; CONTIGUOUS: // %bb.0: ; CONTIGUOUS-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CONTIGUOUS-NEXT: addvl sp, sp, #-15 -; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CONTIGUOUS-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CONTIGUOUS-NEXT: ptrue pn8.b ; CONTIGUOUS-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CONTIGUOUS-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill @@ -1785,7 +1785,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12_scalar( % ; CONTIGUOUS-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvt.ll index 58d2e253eaafd..3ae62e30c4089 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvt.ll @@ -49,7 +49,7 @@ define { , , , , , , , } @multi_vector_sat_shift_narrow ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -36,7 +36,7 @@ define { , } @multi_vector_sat_shift_narrow ; CHECK-NEXT: sqrshr z1.h, { z10.s, z11.s }, #16 ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -89,7 +89,7 @@ define { , , , , , , ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: add x9, x0, x1 ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill @@ -116,7 +116,7 @@ define void @svdot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z10.h, z11.h }, z0.h[0] ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -192,7 +192,7 @@ define void @svdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-9 ; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, wzr @@ -220,7 +220,7 @@ define void @svdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -331,7 +331,7 @@ define void @uvdot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: add x9, x0, x1 ; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill @@ -343,7 +343,7 @@ define void @uvdot_form_2x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z10.h, z11.h }, z0.h[0] ; CHECK-NEXT: ldr z11, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -419,7 +419,7 @@ define void @uvdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-9 ; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, wzr @@ -447,7 +447,7 @@ define void @uvdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, ; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: addvl sp, sp, #9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -558,7 +558,7 @@ define void @suvdot_form_4x_tuple_svecc(ptr %ptr, i64 %stride, %pred, double %fp) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-17 ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -375,18 +375,18 @@ define void @all_stack_areas( %pred, double %fp) { ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #17 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -459,21 +459,21 @@ define void @all_stack_areas_fp( %pred, double %fp) "frame-poi ; CHECK-LABEL: all_stack_areas_fp: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-17 ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -534,20 +534,20 @@ define void @all_stack_areas_fp( %pred, double %fp) "frame-poi ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #17 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca @@ -607,7 +607,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 64 @@ -619,18 +619,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-NEXT: .cfi_offset w30, -56 ; CHECK-NEXT: .cfi_offset w29, -64 ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-16 ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -704,18 +704,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-NEXT: .cfi_restore z13 ; CHECK-NEXT: .cfi_restore z14 ; CHECK-NEXT: .cfi_restore z15 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_def_cfa wsp, 64 ; CHECK-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload @@ -800,12 +800,12 @@ define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) { ; CHECK-LABEL: only_ppr_csr_vla: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -820,11 +820,11 @@ define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: addvl sp, x29, #-1 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %alloc = alloca i8, i64 %n, align 1 @@ -840,7 +840,7 @@ define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) { ; CHECK-LABEL: only_zpr_csr_vla: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-3 @@ -870,7 +870,7 @@ define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) { ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %alloc = alloca i8, i64 %n, align 1 @@ -888,12 +888,12 @@ define aarch64_sve_vector_pcs void @zpr_ppr_csr_vla(i64 %n) { ; CHECK-LABEL: zpr_ppr_csr_vla: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill @@ -922,11 +922,11 @@ define aarch64_sve_vector_pcs void @zpr_ppr_csr_vla(i64 %n) { ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, x29, #-1 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %alloc = alloca i8, i64 %n, align 1 @@ -944,12 +944,12 @@ define void @sve_locals_only_ppr_csr_vla(i64 %n, %pred, %pred, %pred, %pred, %pred, %pred, %vs) "aarch64_psta ; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8: ; CHECK1024-NOSPLITSVE: // %bb.0: // %entry ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 ; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 @@ -1009,7 +1009,7 @@ define i32 @svecc_csr_d8(i32 noundef %num, %vs) "aarch64_psta ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024 ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ret ; @@ -1083,7 +1083,7 @@ define i32 @svecc_csr_d8d9(i32 noundef %num, %vs) "aarch64_ps ; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8d9: ; CHECK1024-NOSPLITSVE: // %bb.0: // %entry ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-2 ; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill @@ -1099,7 +1099,7 @@ define i32 @svecc_csr_d8d9(i32 noundef %num, %vs) "aarch64_ps ; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #2 -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ret ; @@ -1173,7 +1173,7 @@ define i32 @svecc_csr_d8_allocd(double %d, %vs) "aarch64_psta ; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_allocd: ; CHECK1024-NOSPLITSVE: // %bb.0: // %entry ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 ; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 @@ -1187,7 +1187,7 @@ define i32 @svecc_csr_d8_allocd(double %d, %vs) "aarch64_psta ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ret ; @@ -1263,7 +1263,7 @@ define i32 @svecc_csr_d8_alloci64(i64 %d, %vs) "aarch64_pstat ; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_alloci64: ; CHECK1024-NOSPLITSVE: // %bb.0: // %entry ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 ; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 @@ -1278,7 +1278,7 @@ define i32 @svecc_csr_d8_alloci64(i64 %d, %vs) "aarch64_pstat ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ret ; @@ -1359,7 +1359,7 @@ define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, %vs) "aarch64_p ; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_allocnxv4i32: ; CHECK1024-NOSPLITSVE: // %bb.0: // %entry ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 ; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 @@ -1377,7 +1377,7 @@ define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, %vs) "aarch64_p ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 ; CHECK1024-NOSPLITSVE-NEXT: ret ; @@ -1531,14 +1531,14 @@ define i32 @svecc_csr_x18_25_d8_15_allocdi64(i64 %d, double %e, ] @sve_signature_pred_2xv4i1_caller([2 x %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: .cfi_def_cfa_offset 64 ; CHECK0-NEXT: cntd x9 ; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill ; CHECK0-NEXT: mov x29, sp ; CHECK0-NEXT: .cfi_def_cfa w29, 64 @@ -2002,18 +2002,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: .cfi_offset w30, -56 ; CHECK0-NEXT: .cfi_offset w29, -64 ; CHECK0-NEXT: addvl sp, sp, #-18 -; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -2073,18 +2073,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK0-NEXT: addvl sp, sp, #18 ; CHECK0-NEXT: .cfi_restore z8 ; CHECK0-NEXT: .cfi_restore z9 @@ -2114,7 +2114,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: .cfi_def_cfa_offset 64 ; CHECK64-NEXT: cntd x9 ; CHECK64-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK64-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK64-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK64-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill ; CHECK64-NEXT: mov x29, sp ; CHECK64-NEXT: .cfi_def_cfa w29, 64 @@ -2126,18 +2126,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: .cfi_offset w30, -56 ; CHECK64-NEXT: .cfi_offset w29, -64 ; CHECK64-NEXT: addvl sp, sp, #-2 -; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-16 ; CHECK64-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -2211,18 +2211,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: .cfi_restore z13 ; CHECK64-NEXT: .cfi_restore z14 ; CHECK64-NEXT: .cfi_restore z15 -; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK64-NEXT: addvl sp, sp, #2 ; CHECK64-NEXT: .cfi_def_cfa wsp, 64 ; CHECK64-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload @@ -2243,13 +2243,13 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 1088 ; CHECK1024-NOSPLITSVE-NEXT: cntd x9 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -16 @@ -2260,18 +2260,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64 ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18 -; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -2333,18 +2333,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #18 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z8 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z9 @@ -2355,12 +2355,12 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z14 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z15 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa wsp, 1088 -; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 0 ; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w19 @@ -2378,7 +2378,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 64 ; CHECK1024-SPLITSVE-NEXT: cntd x9 ; CHECK1024-SPLITSVE-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK1024-SPLITSVE-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill ; CHECK1024-SPLITSVE-NEXT: mov x29, sp ; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64 @@ -2390,18 +2390,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56 ; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64 ; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 -; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 ; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16 ; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -2475,18 +2475,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-SPLITSVE-NEXT: .cfi_restore z13 ; CHECK1024-SPLITSVE-NEXT: .cfi_restore z14 ; CHECK1024-SPLITSVE-NEXT: .cfi_restore z15 -; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2 ; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa wsp, 64 ; CHECK1024-SPLITSVE-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload @@ -2514,7 +2514,7 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %v) "aarch64_pstate_sm_ ; CHECK1024-LABEL: ordering_test: ; CHECK1024: // %bb.0: // %entry ; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NEXT: sub sp, sp, #1088 ; CHECK1024-NEXT: .cfi_def_cfa_offset 2128 ; CHECK1024-NEXT: .cfi_offset w29, -16 @@ -3135,7 +3135,7 @@ define void @ordering_test(double %d, half %h, <4 x i32> %v) "aarch64_pstate_sm_ ; CHECK1024-NEXT: str h1, [sp, #1078] ; CHECK1024-NEXT: str q2, [sp, #1056] ; CHECK1024-NEXT: add sp, sp, #1088 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NEXT: add sp, sp, #1040 ; CHECK1024-NEXT: ret entry: @@ -3159,7 +3159,7 @@ define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_psta ; CHECK0-LABEL: ordering_test_array: ; CHECK0: // %bb.0: // %entry ; CHECK0-NEXT: sub sp, sp, #272 -; CHECK0-NEXT: str x29, [sp, #256] // 8-byte Folded Spill +; CHECK0-NEXT: str x29, [sp, #256] // 8-byte Spill ; CHECK0-NEXT: .cfi_def_cfa_offset 272 ; CHECK0-NEXT: .cfi_offset w29, -16 ; CHECK0-NEXT: add x8, sp, #128 @@ -3172,7 +3172,7 @@ define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_psta ; CHECK64-LABEL: ordering_test_array: ; CHECK64: // %bb.0: // %entry ; CHECK64-NEXT: sub sp, sp, #400 -; CHECK64-NEXT: str x29, [sp, #384] // 8-byte Folded Spill +; CHECK64-NEXT: str x29, [sp, #384] // 8-byte Spill ; CHECK64-NEXT: .cfi_def_cfa_offset 400 ; CHECK64-NEXT: .cfi_offset w29, -16 ; CHECK64-NEXT: mov x8, sp @@ -3185,7 +3185,7 @@ define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_psta ; CHECK1024-LABEL: ordering_test_array: ; CHECK1024: // %bb.0: // %entry ; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NEXT: sub sp, sp, #1280 ; CHECK1024-NEXT: .cfi_def_cfa_offset 2320 ; CHECK1024-NEXT: .cfi_offset w29, -16 @@ -3194,7 +3194,7 @@ define void @ordering_test_array(i64 %o, i64 %p, float %f, i32 %x) "aarch64_psta ; CHECK1024-NEXT: add x8, sp, #1152 ; CHECK1024-NEXT: str s0, [x8, x1, lsl #2] ; CHECK1024-NEXT: add sp, sp, #1280 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NEXT: add sp, sp, #1040 ; CHECK1024-NEXT: ret entry: @@ -3218,7 +3218,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK0-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK0-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK0-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK0-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK0-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill ; CHECK0-NEXT: add x29, sp, #64 ; CHECK0-NEXT: .cfi_def_cfa w29, 48 @@ -3290,7 +3290,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK64-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill ; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill -; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Spill ; CHECK64-NEXT: add x29, sp, #128 ; CHECK64-NEXT: .cfi_def_cfa w29, 48 ; CHECK64-NEXT: .cfi_offset w19, -16 @@ -3331,7 +3331,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK64-NEXT: sub sp, x29, #128 ; CHECK64-NEXT: .cfi_def_cfa wsp, 176 ; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload -; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload +; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Reload ; CHECK64-NEXT: ldp x29, x30, [sp, #128] // 16-byte Folded Reload ; CHECK64-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK64-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload @@ -3362,12 +3362,12 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK1024-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK1024-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK1024-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK1024-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill -; CHECK1024-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill -; CHECK1024-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill -; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill -; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill -; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill +; CHECK1024-NEXT: str x29, [sp, #1088] // 8-byte Spill +; CHECK1024-NEXT: str x30, [sp, #1096] // 8-byte Spill +; CHECK1024-NEXT: str x9, [sp, #1104] // 8-byte Spill +; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Spill +; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Spill +; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Spill ; CHECK1024-NEXT: add x29, sp, #1088 ; CHECK1024-NEXT: .cfi_def_cfa w29, 48 ; CHECK1024-NEXT: .cfi_offset w19, -8 @@ -3409,12 +3409,12 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK1024-NEXT: sub sp, x29, #1088 ; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136 ; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Reload ; CHECK1024-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr x20, [sp, #1120] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1120] // 8-byte Reload +; CHECK1024-NEXT: ldr x28, [sp, #1112] // 8-byte Reload +; CHECK1024-NEXT: ldr x30, [sp, #1096] // 8-byte Reload +; CHECK1024-NEXT: ldr x29, [sp, #1088] // 8-byte Reload ; CHECK1024-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK1024-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload ; CHECK1024-NEXT: add sp, sp, #1136 @@ -3504,11 +3504,11 @@ define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_comp ; CHECK1024-LABEL: sve_stack_object_and_vla: ; CHECK1024: // %bb.0: // %entry ; CHECK1024-NEXT: sub sp, sp, #1056 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NEXT: add x29, sp, #1024 -; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill -; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Spill +; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Spill +; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Spill ; CHECK1024-NEXT: sub sp, sp, #1024 ; CHECK1024-NEXT: addvl sp, sp, #-1 ; CHECK1024-NEXT: mov x19, sp @@ -3529,10 +3529,10 @@ define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_comp ; CHECK1024-NEXT: bl bar ; CHECK1024-NEXT: mov w0, wzr ; CHECK1024-NEXT: sub sp, x29, #1024 -; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Reload +; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NEXT: add sp, sp, #1056 ; CHECK1024-NEXT: ret entry: @@ -3564,18 +3564,18 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 % ; CHECK0-LABEL: svecc_call_dynamic_and_scalable_alloca: ; CHECK0: // %bb.0: // %entry ; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill -; CHECK0-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK0-NEXT: mov x29, sp ; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill ; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK0-NEXT: addvl sp, sp, #-18 -; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -4732,21 +4732,21 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK0-NEXT: mov sp, x29 ; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK0-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload ; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload ; CHECK0-NEXT: ret @@ -4754,23 +4754,23 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca: ; CHECK64: // %bb.0: // %entry ; CHECK64-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill -; CHECK64-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK64-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK64-NEXT: mov x29, sp ; CHECK64-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill ; CHECK64-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK64-NEXT: addvl sp, sp, #-2 -; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-16 ; CHECK64-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -4845,21 +4845,21 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK64-NEXT: addvl sp, x29, #-2 -; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK64-NEXT: mov sp, x29 ; CHECK64-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK64-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK64-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK64-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload ; CHECK64-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload ; CHECK64-NEXT: ret @@ -4867,27 +4867,27 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK1024-NOSPLITSVE-LABEL: svecc_call_dynamic_and_scalable_alloca: ; CHECK1024-NOSPLITSVE: // %bb.0: // %entry ; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088 -; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024 -; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1040] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1048] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1056] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x20, [sp, #1064] // 8-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18 -; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -4959,49 +4959,49 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: sub sp, x29, #1024 -; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x20, [sp, #1064] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1056] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1048] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1040] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Reload ; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088 ; CHECK1024-NOSPLITSVE-NEXT: ret ; ; CHECK1024-SPLITSVE-LABEL: svecc_call_dynamic_and_scalable_alloca: ; CHECK1024-SPLITSVE: // %bb.0: // %entry ; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK1024-SPLITSVE-NEXT: mov x29, sp ; CHECK1024-SPLITSVE-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill ; CHECK1024-SPLITSVE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 -; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 ; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16 ; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -5076,21 +5076,21 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2 -; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK1024-SPLITSVE-NEXT: mov sp, x29 ; CHECK1024-SPLITSVE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK1024-SPLITSVE-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK1024-SPLITSVE-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload ; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload ; CHECK1024-SPLITSVE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll index e7687f0d3994b..6bc8bccac7472 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll @@ -99,7 +99,7 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -128,7 +128,7 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 @@ -149,7 +149,7 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -188,7 +188,7 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 @@ -293,7 +293,7 @@ define void @reserved_call_frame(i64 %n) #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w28, -16 @@ -305,7 +305,7 @@ define void @reserved_call_frame(i64 %n) #0 { ; CHECK-NEXT: bl callee_stack_args ; CHECK-NEXT: add sp, sp, #1504 ; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w28 @@ -326,7 +326,7 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -16 @@ -351,7 +351,7 @@ define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" { ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: .cfi_def_cfa wsp, 32 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 diff --git a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll index 59b95be6fc568..79cf6d708c3e0 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll @@ -288,10 +288,10 @@ define void @sve_1p_csr( %a) #0 { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -311,16 +311,16 @@ define void @sve_4p_csr( %a) #0 { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str p11, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr p11, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -353,7 +353,7 @@ define void @sve_16v_1p_csr( %a) #0 { ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: ldr xzr, [sp] ; CHECK-NEXT: .cfi_def_cfa_register wsp -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill @@ -396,7 +396,7 @@ define void @sve_16v_1p_csr( %a) #0 { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #17 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -684,7 +684,7 @@ define void @sve_unprobed_area( %a, i32 %n) #0 { ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill @@ -700,7 +700,7 @@ define void @sve_unprobed_area( %a, i32 %n) #0 { ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 diff --git a/llvm/test/CodeGen/AArch64/stack-probing.ll b/llvm/test/CodeGen/AArch64/stack-probing.ll index 8fc90cfd71b18..3e0eaf1340aea 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing.ll @@ -32,7 +32,7 @@ define void @static_256(ptr %out) #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #272 ; CHECK-NEXT: .cfi_def_cfa_offset 272 -; CHECK-NEXT: str x29, [sp, #256] // 8-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #256] // 8-byte Spill ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll index 32c3eaeb9c876..167c6659cd2f6 100644 --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -202,7 +202,7 @@ define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" { ; CHECK-LABEL: test_attributes: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr x8, [sp, #64] @@ -214,7 +214,7 @@ define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" { ; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: bl consume_attributes ; CHECK-NEXT: .Ltmp9: -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll index 6021f9fab2cdd..9c66b38c46973 100644 --- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll +++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll @@ -28,14 +28,14 @@ define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { ; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src ; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] ; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] ; CHECK-NO-SME-ROUTINES-NEXT: smstop sm ; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy ; CHECK-NO-SME-ROUTINES-NEXT: smstart sm ; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -76,14 +76,14 @@ define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { ; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst ; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] ; CHECK-NO-SME-ROUTINES-NEXT: smstop sm ; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #2 // =0x2 ; CHECK-NO-SME-ROUTINES-NEXT: bl memset ; CHECK-NO-SME-ROUTINES-NEXT: smstart sm ; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -125,14 +125,14 @@ define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { ; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src ; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] ; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] ; CHECK-NO-SME-ROUTINES-NEXT: smstop sm ; CHECK-NO-SME-ROUTINES-NEXT: bl memmove ; CHECK-NO-SME-ROUTINES-NEXT: smstart sm ; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -218,7 +218,7 @@ define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind { ; CHECK-NEXT: mov x2, x0 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: adrp x0, :got:dst ; CHECK-NEXT: adrp x1, :got:src @@ -227,7 +227,7 @@ define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind { ; CHECK-NEXT: bl __arm_sc_memcpy ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload @@ -240,7 +240,7 @@ define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind { ; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 ; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NO-SME-ROUTINES-NEXT: smstart sm ; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst ; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src @@ -249,7 +249,7 @@ define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind { ; CHECK-NO-SME-ROUTINES-NEXT: smstop sm ; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy ; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll index 8b7fa9e7b7f71..324c35b340ed6 100644 --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -12,18 +12,18 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -81,18 +81,18 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll index 254b8e03636da..7f1a268ed4a56 100644 --- a/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll +++ b/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll @@ -15,18 +15,18 @@ define void @fbyte( %v){ ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-18 -; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -71,18 +71,18 @@ define void @fbyte( %v){ ; NOPAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #18 ; NOPAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; NOPAIR-NEXT: ret @@ -91,26 +91,26 @@ define void @fbyte( %v){ ; PAIR: // %bb.0: ; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-18 -; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill ; PAIR-NEXT: ptrue pn8.b -; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; PAIR-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 16 + 144 * VG @@ -135,18 +135,18 @@ define void @fbyte( %v){ ; PAIR-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload ; PAIR-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload ; PAIR-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload -; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #18 ; PAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; PAIR-NEXT: ret @@ -159,18 +159,18 @@ define void @fhalf( %v) { ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-18 -; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -215,18 +215,18 @@ define void @fhalf( %v) { ; NOPAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #18 ; NOPAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; NOPAIR-NEXT: ret @@ -235,26 +235,26 @@ define void @fhalf( %v) { ; PAIR: // %bb.0: ; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-18 -; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill ; PAIR-NEXT: ptrue pn8.b -; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill -; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; PAIR-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 16 + 144 * VG @@ -279,18 +279,18 @@ define void @fhalf( %v) { ; PAIR-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload ; PAIR-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload ; PAIR-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload -; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #18 ; PAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; PAIR-NEXT: ret @@ -305,8 +305,8 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() { ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-4 -; NOPAIR-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill @@ -320,8 +320,8 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() { ; NOPAIR-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #4 ; NOPAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; NOPAIR-NEXT: ret @@ -330,10 +330,10 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() { ; PAIR: // %bb.0: ; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-4 -; PAIR-NEXT: str p8, [sp, #5, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p8, [sp, #5, mul vl] // 2-byte Spill ; PAIR-NEXT: ptrue pn8.b -; PAIR-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; PAIR-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; PAIR-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG @@ -346,9 +346,9 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() { ; PAIR-NEXT: ptrue pn8.b ; PAIR-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload -; PAIR-NEXT: ldr p8, [sp, #5, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; PAIR-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p8, [sp, #5, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; PAIR-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #4 ; PAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; PAIR-NEXT: ret @@ -363,8 +363,8 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs2() { ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-4 -; NOPAIR-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill -; NOPAIR-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill +; NOPAIR-NEXT: str p9, [sp, #7, mul vl] // 2-byte Spill ; NOPAIR-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill @@ -378,8 +378,8 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs2() { ; NOPAIR-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; NOPAIR-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; NOPAIR-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload -; NOPAIR-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload +; NOPAIR-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #4 ; NOPAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; NOPAIR-NEXT: ret @@ -388,9 +388,9 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs2() { ; PAIR: // %bb.0: ; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-4 -; PAIR-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p9, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: ptrue pn9.b -; PAIR-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; PAIR-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: st1b { z8.b, z9.b }, pn9, [sp, #2, mul vl] // 32-byte Folded Spill ; PAIR-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG @@ -402,9 +402,9 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs2() { ; PAIR-NEXT: //NO_APP ; PAIR-NEXT: ptrue pn9.b ; PAIR-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; PAIR-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; PAIR-NEXT: ld1b { z8.b, z9.b }, pn9/z, [sp, #2, mul vl] // 32-byte Folded Reload -; PAIR-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #4 ; PAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; PAIR-NEXT: ret @@ -437,7 +437,7 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_regs() { ; PAIR: // %bb.0: ; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-3 -; PAIR-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -446,7 +446,7 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_regs() { ; PAIR-NEXT: .cfi_escape 0x10, 0x49, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d9 @ cfa - 16 * VG - 16 ; PAIR-NEXT: //APP ; PAIR-NEXT: //NO_APP -; PAIR-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; PAIR-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; PAIR-NEXT: addvl sp, sp, #3 @@ -535,12 +535,12 @@ define aarch64_sve_vector_pcs void @test_clobbers_p_reg_negative() { ; NOPAIR: // %bb.0: ; NOPAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-1 -; NOPAIR-NEXT: str p10, [sp, #7, mul vl] // 2-byte Folded Spill +; NOPAIR-NEXT: str p10, [sp, #7, mul vl] // 2-byte Spill ; NOPAIR-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; NOPAIR-NEXT: .cfi_offset w29, -16 ; NOPAIR-NEXT: //APP ; NOPAIR-NEXT: //NO_APP -; NOPAIR-NEXT: ldr p10, [sp, #7, mul vl] // 2-byte Folded Reload +; NOPAIR-NEXT: ldr p10, [sp, #7, mul vl] // 2-byte Reload ; NOPAIR-NEXT: addvl sp, sp, #1 ; NOPAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; NOPAIR-NEXT: ret @@ -549,12 +549,12 @@ define aarch64_sve_vector_pcs void @test_clobbers_p_reg_negative() { ; PAIR: // %bb.0: ; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-1 -; PAIR-NEXT: str p10, [sp, #7, mul vl] // 2-byte Folded Spill +; PAIR-NEXT: str p10, [sp, #7, mul vl] // 2-byte Spill ; PAIR-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; PAIR-NEXT: .cfi_offset w29, -16 ; PAIR-NEXT: //APP ; PAIR-NEXT: //NO_APP -; PAIR-NEXT: ldr p10, [sp, #7, mul vl] // 2-byte Folded Reload +; PAIR-NEXT: ldr p10, [sp, #7, mul vl] // 2-byte Reload ; PAIR-NEXT: addvl sp, sp, #1 ; PAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; PAIR-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index 90660515e4255..6118446a69ac7 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -494,18 +494,18 @@ define @sve_caller_non_sve_callee_high_range( @sve_caller_non_sve_callee_high_range( @sve_ret_caller_non_sve_callee_high_range() { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -660,18 +660,18 @@ define @sve_ret_caller_non_sve_callee_high_range() { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll index 4aaa25e5e66c5..f6251ff66299e 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -65,9 +65,9 @@ define @extract_nxv14i1_nxv28i1_14( %in) uw ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: punpkhi p2.h, p1.b -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: punpklo p1.h, p1.b -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: punpklo p2.h, p2.b ; CHECK-NEXT: punpkhi p3.h, p1.b ; CHECK-NEXT: punpkhi p4.h, p2.b @@ -83,10 +83,10 @@ define @extract_nxv14i1_nxv28i1_14( %in) uw ; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uzp1 p3.s, p5.s, p3.s -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s ; CHECK-NEXT: uzp1 p1.h, p2.h, p4.h -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll index ac4c387b70583..6d3612c909032 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll @@ -12,12 +12,12 @@ define void @st1d_fixed(ptr %ptr) #0 { ; CHECK-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Spill ; CHECK-NEXT: mov x20, sp ; CHECK-NEXT: bl def ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] -; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #160 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll index 61e04682fa0bf..61376c36b29b5 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll @@ -21,23 +21,23 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: bl __extenddftf2 ; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill ; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload ; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: bl __extenddftf2 ; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: bl __extenddftf2 ; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: bl __extenddftf2 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: stp q1, q0, [x19] ; CHECK-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: stp q0, q1, [x19, #32] @@ -68,24 +68,24 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: stp q0, q1, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: ldp q1, q0, [x0, #32] -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: bl __trunctfdf2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ptrue p0.d, vl2 @@ -93,24 +93,24 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-NEXT: bl __trunctfdf2 -; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ptrue p0.d, vl2 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll index c3fca4c18ee70..e0c5041a0cd3a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -13,7 +13,7 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #368 ; CHECK-NEXT: stp x29, x30, [sp, #336] // 16-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #352] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #352] // 8-byte Spill ; CHECK-NEXT: add x29, sp, #336 ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w28, -16 @@ -53,7 +53,7 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v ; CHECK-NEXT: str z0, [sp] ; CHECK-NEXT: bl func2 ; CHECK-NEXT: ldp x29, x30, [sp, #336] // 16-byte Folded Reload -; CHECK-NEXT: ldr x28, [sp, #352] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #352] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #368 ; CHECK-NEXT: ret ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr %v13, ptr %v14, ptr %v15, ptr %v16, diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll index f1d5813433489..a8b2c30bec562 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -870,17 +870,17 @@ define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -895,27 +895,27 @@ define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-NEXT: mov v0.16b, v3.16b ; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov v0.d[1], v1.d[0] @@ -940,54 +940,54 @@ define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-NEXT: mov v0.16b, v7.16b ; CHECK-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload @@ -1018,127 +1018,127 @@ define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-NEXT: sub sp, sp, #256 ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl x8, sp, #4 -; CHECK-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #240] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #272] ; CHECK-NEXT: addvl x8, sp, #4 -; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #112] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #288] ; CHECK-NEXT: addvl x8, sp, #4 ; CHECK-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #96] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #304] ; CHECK-NEXT: addvl x8, sp, #4 ; CHECK-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #320] ; CHECK-NEXT: addvl x8, sp, #4 -; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #336] ; CHECK-NEXT: addvl x8, sp, #4 -; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #352] ; CHECK-NEXT: addvl x8, sp, #4 -; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #368] ; CHECK-NEXT: addvl x8, sp, #4 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-NEXT: ldr q1, [x8, #384] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #240] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Reload ; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index 62a3fa7f29bb5..465ba38b17874 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -1609,15 +1609,15 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #48 -; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Spill +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-i32-NEXT: add sp, sp, #48 @@ -1626,17 +1626,17 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-i64-LABEL: lrint_v2fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #48 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b -; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: add sp, sp, #48 ; CHECK-i64-NEXT: ret @@ -1649,26 +1649,26 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #80 -; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-i32-NEXT: mov v0.s[3], w0 ; CHECK-i32-NEXT: add sp, sp, #80 ; CHECK-i32-NEXT: ret @@ -1678,27 +1678,27 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #64 ; CHECK-i64-NEXT: addvl sp, sp, #-1 -; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Spill ; CHECK-i64-NEXT: mov v0.16b, v3.16b ; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #64 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #64 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] @@ -1721,7 +1721,7 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #176 -; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Spill ; CHECK-i32-NEXT: mov v0.16b, v7.16b ; CHECK-i32-NEXT: stp x30, x25, [sp, #112] // 16-byte Folded Spill ; CHECK-i32-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill @@ -1731,25 +1731,25 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i32-NEXT: stp q4, q3, [sp, #32] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q2, q1, [sp, #64] // 32-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i32-NEXT: mov w19, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: mov w20, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i32-NEXT: mov w21, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: mov w22, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i32-NEXT: mov w23, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i32-NEXT: mov w24, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i32-NEXT: mov w25, w0 ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s1, w22 @@ -1772,54 +1772,54 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #128 ; CHECK-i64-NEXT: addvl sp, sp, #-2 -; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Spill ; CHECK-i64-NEXT: mov v0.16b, v7.16b ; CHECK-i64-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill ; CHECK-i64-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill ; CHECK-i64-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #128 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #128 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #128 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #128 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload @@ -1851,96 +1851,96 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-i32-NEXT: stp q2, q1, [sp, #176] // 32-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #368] ; CHECK-i32-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #384] ; CHECK-i32-NEXT: stp x28, x27, [sp, #288] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #400] ; CHECK-i32-NEXT: stp x26, x25, [sp, #304] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #416] ; CHECK-i32-NEXT: stp x24, x23, [sp, #320] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #432] ; CHECK-i32-NEXT: stp x22, x21, [sp, #336] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #448] ; CHECK-i32-NEXT: stp x20, x19, [sp, #352] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #464] ; CHECK-i32-NEXT: stp q7, q6, [sp, #80] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #480] ; CHECK-i32-NEXT: stp q5, q4, [sp, #112] // 32-byte Folded Spill ; CHECK-i32-NEXT: mov v0.16b, v1.16b ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #268] // 4-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Reload +; CHECK-i32-NEXT: str w0, [sp, #268] // 4-byte Spill ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #240] // 4-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload +; CHECK-i32-NEXT: str w0, [sp, #240] // 4-byte Spill ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #224] // 4-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload +; CHECK-i32-NEXT: str w0, [sp, #224] // 4-byte Spill ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-i32-NEXT: mov w23, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #208] // 4-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload +; CHECK-i32-NEXT: str w0, [sp, #208] // 4-byte Spill ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: mov w24, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i32-NEXT: mov w25, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i32-NEXT: mov w27, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i32-NEXT: mov w26, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i32-NEXT: mov w28, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-i32-NEXT: mov w29, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-i32-NEXT: mov w19, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-i32-NEXT: mov w20, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-i32-NEXT: mov w21, w0 ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-i32-NEXT: mov w22, w0 ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s1, w19 ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr w8, [sp, #224] // 4-byte Reload ; CHECK-i32-NEXT: fmov s2, w27 ; CHECK-i32-NEXT: fmov s3, w23 ; CHECK-i32-NEXT: mov v0.s[1], w22 ; CHECK-i32-NEXT: mov v1.s[1], w29 ; CHECK-i32-NEXT: mov v2.s[1], w25 ; CHECK-i32-NEXT: mov v3.s[1], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr w8, [sp, #240] // 4-byte Reload ; CHECK-i32-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload ; CHECK-i32-NEXT: mov v0.s[2], w21 ; CHECK-i32-NEXT: mov v1.s[2], w28 ; CHECK-i32-NEXT: mov v2.s[2], w24 ; CHECK-i32-NEXT: mov v3.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr w8, [sp, #208] // 4-byte Reload ; CHECK-i32-NEXT: ldp x22, x21, [sp, #336] // 16-byte Folded Reload ; CHECK-i32-NEXT: ldp x24, x23, [sp, #320] // 16-byte Folded Reload ; CHECK-i32-NEXT: mov v0.s[3], w20 ; CHECK-i32-NEXT: mov v1.s[3], w26 ; CHECK-i32-NEXT: mov v2.s[3], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr w8, [sp, #268] // 4-byte Reload ; CHECK-i32-NEXT: ldp x20, x19, [sp, #352] // 16-byte Folded Reload ; CHECK-i32-NEXT: ldp x26, x25, [sp, #304] // 16-byte Folded Reload ; CHECK-i32-NEXT: mov v3.s[3], w8 @@ -1954,127 +1954,127 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-i64-NEXT: sub sp, sp, #256 ; CHECK-i64-NEXT: addvl sp, sp, #-4 ; CHECK-i64-NEXT: addvl x8, sp, #4 -; CHECK-i64-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #240] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #272] ; CHECK-i64-NEXT: addvl x8, sp, #4 -; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Spill ; CHECK-i64-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill -; CHECK-i64-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #112] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #288] ; CHECK-i64-NEXT: addvl x8, sp, #4 ; CHECK-i64-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill -; CHECK-i64-NEXT: str q1, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #96] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #304] ; CHECK-i64-NEXT: addvl x8, sp, #4 ; CHECK-i64-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill -; CHECK-i64-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #80] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #320] ; CHECK-i64-NEXT: addvl x8, sp, #4 -; CHECK-i64-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #336] ; CHECK-i64-NEXT: addvl x8, sp, #4 -; CHECK-i64-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #48] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #352] ; CHECK-i64-NEXT: addvl x8, sp, #4 -; CHECK-i64-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #32] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #368] ; CHECK-i64-NEXT: addvl x8, sp, #4 -; CHECK-i64-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-i64-NEXT: ldr q1, [x8, #384] ; CHECK-i64-NEXT: mov v0.16b, v1.16b ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload ; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload ; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Reload ; CHECK-i64-NEXT: add x8, sp, #256 ; CHECK-i64-NEXT: ptrue p0.d, vl2 ; CHECK-i64-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll index 71108f00a0054..4ae7ac7b292e9 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -251,9 +251,9 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000 @@ -283,16 +283,16 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-NEXT: fcvtzs z4.d, p2/m, z2.d ; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d ; CHECK-NEXT: sel z0.d, p4, z25.d, z6.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.d, p0/z, z3.d, z3.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d ; CHECK-NEXT: sel z2.d, p1, z25.d, z7.d ; CHECK-NEXT: sel z3.d, p5, z25.d, z4.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z3.s, z2.s @@ -338,9 +338,9 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000 @@ -373,14 +373,14 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d ; CHECK-NEXT: sel z0.d, p4, z25.d, z7.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z1.d, p1, z25.d, z24.d ; CHECK-NEXT: sel z2.d, p5, z25.d, z3.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 z0.s, z0.s, z4.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll index 123f6c55c20ab..fe398921cf1b5 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -207,7 +207,7 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d @@ -227,7 +227,7 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-NEXT: fcvtzu z6.d, p3/m, z3.d ; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z7.d ; CHECK-NEXT: fcvtzu z24.d, p4/m, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z7.d ; CHECK-NEXT: mov z0.d, #0xffffffff ; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z7.d @@ -274,7 +274,7 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d @@ -294,7 +294,7 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-NEXT: fcvtzu z6.d, p3/m, z1.d ; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z7.d ; CHECK-NEXT: fcvtzu z24.d, p4/m, z0.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z7.d ; CHECK-NEXT: mov z2.d, #65535 // =0xffff ; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z7.d diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 73c783d4735f8..00a08e505b943 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -895,7 +895,7 @@ define @insert_nxv1i1_nxv16i1_0( %vec, @insert_nxv1i1_nxv16i1_0( %vec, @insert_nxv1i1_nxv16i1_1( %vec, @insert_nxv1i1_nxv16i1_1( %vec, @insert_nxv1i1_nxv16i1_2( %vec, @insert_nxv1i1_nxv16i1_2( %vec, @insert_nxv1i1_nxv16i1_3( %vec, @insert_nxv1i1_nxv16i1_3( %vec, @insert_nxv1i1_nxv16i1_4( %vec, @insert_nxv1i1_nxv16i1_4( %vec, @insert_nxv1i1_nxv16i1_5( %vec, @insert_nxv1i1_nxv16i1_5( %vec, @insert_nxv1i1_nxv16i1_6( %vec, @insert_nxv1i1_nxv16i1_6( %vec, @insert_nxv1i1_nxv16i1_7( %vec, @insert_nxv1i1_nxv16i1_7( %vec, @insert_nxv1i1_nxv16i1_8( %vec, @insert_nxv1i1_nxv16i1_8( %vec, @insert_nxv1i1_nxv16i1_9( %vec, @insert_nxv1i1_nxv16i1_9( %vec, @insert_nxv1i1_nxv16i1_10( %vec, @insert_nxv1i1_nxv16i1_10( %vec, @insert_nxv1i1_nxv16i1_11( %vec, @insert_nxv1i1_nxv16i1_11( %vec, @insert_nxv1i1_nxv16i1_12( %vec, @insert_nxv1i1_nxv16i1_12( %vec, @insert_nxv1i1_nxv16i1_13( %vec, @insert_nxv1i1_nxv16i1_13( %vec, @insert_nxv1i1_nxv16i1_14( %vec, @insert_nxv1i1_nxv16i1_14( %vec, @insert_nxv1i1_nxv16i1_15( %vec, @insert_nxv1i1_nxv16i1_15( %vec, @llrint_v8i64_v8f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h @@ -129,11 +129,11 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: sel z1.d, p4, z25.d, z6.d ; CHECK-NEXT: sel z2.d, p5, z25.d, z7.d ; CHECK-NEXT: fcmuo p2.h, p0/z, z3.h, z3.h -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z24.h, z24.h ; CHECK-NEXT: sel z3.d, p6, z25.d, z4.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z2.d, p2/m, #0 // =0x0 @@ -151,13 +151,13 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h @@ -228,9 +228,9 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h @@ -239,18 +239,18 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h ; CHECK-NEXT: mov z6.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -266,15 +266,15 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -501,15 +501,15 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -602,9 +602,9 @@ define @llrint_v8i64_v8f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.d, z0.s @@ -643,14 +643,14 @@ define @llrint_v8i64_v8f32( %x) { ; CHECK-NEXT: fcmuo p6.s, p0/z, z3.s, z3.s ; CHECK-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s ; CHECK-NEXT: sel z2.d, p5, z4.d, z24.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s ; CHECK-NEXT: sel z1.d, p3, z4.d, z7.d ; CHECK-NEXT: sel z3.d, p1, z4.d, z26.d ; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z1.d, p4/m, #0 // =0x0 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -666,13 +666,13 @@ define @llrint_v16i64_v16f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -744,7 +744,7 @@ define @llrint_v16i64_v16f32( %x) { ; CHECK-NEXT: fcmgt p9.s, p0/z, z24.s, z29.s ; CHECK-NEXT: fcmgt p5.s, p0/z, z30.s, z29.s ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmgt p6.s, p0/z, z25.s, z29.s ; CHECK-NEXT: fcvtzs z7.d, p4/m, z5.s ; CHECK-NEXT: fcmgt p4.s, p0/z, z5.s, z29.s @@ -752,22 +752,22 @@ define @llrint_v16i64_v16f32( %x) { ; CHECK-NEXT: fcmuo p7.s, p0/z, z6.s, z6.s ; CHECK-NEXT: sel z6.d, p6, z9.d, z8.d ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p8.s, p0/z, z30.s, z30.s ; CHECK-NEXT: fcmuo p9.s, p0/z, z25.s, z25.s ; CHECK-NEXT: mov z7.d, p4/m, z9.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p3.s, p0/z, z24.s, z24.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z5.s, z5.s ; CHECK-NEXT: sel z5.d, p5, z9.d, z31.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z6.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #3 @@ -783,15 +783,15 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -1016,15 +1016,15 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1115,9 +1115,9 @@ define @llrint_v8i64_v8f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d @@ -1153,13 +1153,13 @@ define @llrint_v8i64_v8f64( %x) { ; CHECK-NEXT: sel z1.d, p3, z25.d, z6.d ; CHECK-NEXT: fcmuo p6.d, p0/z, z2.d, z2.d ; CHECK-NEXT: sel z2.d, p4, z25.d, z7.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.d, p0/z, z3.d, z3.d ; CHECK-NEXT: sel z3.d, p1, z25.d, z5.d ; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1174,13 +1174,13 @@ define @llrint_v16f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -1249,23 +1249,23 @@ define @llrint_v16f64( %x) { ; CHECK-NEXT: sel z4.d, p9, z8.d, z29.d ; CHECK-NEXT: fcmuo p8.d, p0/z, z5.d, z5.d ; CHECK-NEXT: sel z5.d, p5, z8.d, z31.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.d, p0/z, z6.d, z6.d ; CHECK-NEXT: sel z6.d, p6, z8.d, z24.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z4.d, p4/m, #0 // =0x0 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z6.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.d, p0/z, z7.d, z7.d ; CHECK-NEXT: sel z7.d, p7, z8.d, z9.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1280,15 +1280,15 @@ define @llrint_v32f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -1509,15 +1509,15 @@ define @llrint_v32f64( %x) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index c072005c793d7..f517e7fe8dc16 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -86,9 +86,9 @@ define @lrint_v8f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h @@ -130,11 +130,11 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: sel z1.d, p4, z25.d, z6.d ; CHECK-NEXT: sel z2.d, p5, z25.d, z7.d ; CHECK-NEXT: fcmuo p2.h, p0/z, z3.h, z3.h -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z24.h, z24.h ; CHECK-NEXT: sel z3.d, p6, z25.d, z4.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z2.d, p2/m, #0 // =0x0 @@ -152,13 +152,13 @@ define @lrint_v16f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h @@ -229,9 +229,9 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h @@ -240,18 +240,18 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h ; CHECK-NEXT: mov z6.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -267,15 +267,15 @@ define @lrint_v32f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -502,15 +502,15 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -603,9 +603,9 @@ define @lrint_v8f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.d, z0.s @@ -644,14 +644,14 @@ define @lrint_v8f32( %x) { ; CHECK-NEXT: fcmuo p6.s, p0/z, z3.s, z3.s ; CHECK-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s ; CHECK-NEXT: sel z2.d, p5, z4.d, z24.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s ; CHECK-NEXT: sel z1.d, p3, z4.d, z7.d ; CHECK-NEXT: sel z3.d, p1, z4.d, z26.d ; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z1.d, p4/m, #0 // =0x0 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -667,13 +667,13 @@ define @lrint_v16f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -745,7 +745,7 @@ define @lrint_v16f32( %x) { ; CHECK-NEXT: fcmgt p9.s, p0/z, z24.s, z29.s ; CHECK-NEXT: fcmgt p5.s, p0/z, z30.s, z29.s ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmgt p6.s, p0/z, z25.s, z29.s ; CHECK-NEXT: fcvtzs z7.d, p4/m, z5.s ; CHECK-NEXT: fcmgt p4.s, p0/z, z5.s, z29.s @@ -753,22 +753,22 @@ define @lrint_v16f32( %x) { ; CHECK-NEXT: fcmuo p7.s, p0/z, z6.s, z6.s ; CHECK-NEXT: sel z6.d, p6, z9.d, z8.d ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p8.s, p0/z, z30.s, z30.s ; CHECK-NEXT: fcmuo p9.s, p0/z, z25.s, z25.s ; CHECK-NEXT: mov z7.d, p4/m, z9.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p3.s, p0/z, z24.s, z24.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z5.s, z5.s ; CHECK-NEXT: sel z5.d, p5, z9.d, z31.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z6.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #3 @@ -784,15 +784,15 @@ define @lrint_v32f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -1017,15 +1017,15 @@ define @lrint_v32f32( %x) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1116,9 +1116,9 @@ define @lrint_v8f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d @@ -1154,13 +1154,13 @@ define @lrint_v8f64( %x) { ; CHECK-NEXT: sel z1.d, p3, z25.d, z6.d ; CHECK-NEXT: fcmuo p6.d, p0/z, z2.d, z2.d ; CHECK-NEXT: sel z2.d, p4, z25.d, z7.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.d, p0/z, z3.d, z3.d ; CHECK-NEXT: sel z3.d, p1, z25.d, z5.d ; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0 -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1175,13 +1175,13 @@ define @lrint_v16f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG @@ -1250,23 +1250,23 @@ define @lrint_v16f64( %x) { ; CHECK-NEXT: sel z4.d, p9, z8.d, z29.d ; CHECK-NEXT: fcmuo p8.d, p0/z, z5.d, z5.d ; CHECK-NEXT: sel z5.d, p5, z8.d, z31.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.d, p0/z, z6.d, z6.d ; CHECK-NEXT: sel z6.d, p6, z8.d, z24.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z4.d, p4/m, #0 // =0x0 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z6.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.d, p0/z, z7.d, z7.d ; CHECK-NEXT: sel z7.d, p7, z8.d, z9.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1281,15 +1281,15 @@ define @lrint_v32f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -1510,15 +1510,15 @@ define @lrint_v32f64( %x) { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll index 24df76b1ab25f..a10b37d734b7d 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll @@ -54,25 +54,25 @@ define aarch64_sve_vector_pcs @add_nxv64i1( ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue p6.b -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ldr p4, [x0] ; CHECK-NEXT: ldr p5, [x1] ; CHECK-NEXT: ldr p7, [x2] ; CHECK-NEXT: ldr p8, [x3] ; CHECK-NEXT: eor p0.b, p6/z, p0.b, p4.b ; CHECK-NEXT: eor p1.b, p6/z, p1.b, p5.b -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: eor p2.b, p6/z, p2.b, p7.b -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: eor p3.b, p6/z, p3.b, p8.b -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -138,25 +138,25 @@ define aarch64_sve_vector_pcs @sub_nxv64i1( ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: ptrue p6.b -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: ldr p4, [x0] ; CHECK-NEXT: ldr p5, [x1] ; CHECK-NEXT: ldr p7, [x2] ; CHECK-NEXT: ldr p8, [x3] ; CHECK-NEXT: eor p0.b, p6/z, p0.b, p4.b ; CHECK-NEXT: eor p1.b, p6/z, p1.b, p5.b -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: eor p2.b, p6/z, p2.b, p7.b -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: eor p3.b, p6/z, p3.b, p8.b -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll index d8de12c5f66b9..3aaae5e73ff23 100644 --- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll +++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll @@ -14,7 +14,7 @@ define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible" ; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-COMMON-NEXT: str x29, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x29, [sp, #8] // 8-byte Spill ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: addvl sp, sp, #-1 ; CHECK-COMMON-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 32 + 8 * VG @@ -30,7 +30,7 @@ define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible" ; CHECK-COMMON-NEXT: str z1, [x8] ; CHECK-COMMON-NEXT: addvl sp, sp, #1 ; CHECK-COMMON-NEXT: add sp, sp, #16 -; CHECK-COMMON-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x29, [sp, #8] // 8-byte Reload ; CHECK-COMMON-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ret ; CHECK-COMMON-NE @@ -158,7 +158,7 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_ ; CHECK-COMMON-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: add x29, sp, #8 -; CHECK-COMMON-NEXT: str x19, [sp, #24] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x19, [sp, #24] // 8-byte Spill ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: addvl sp, sp, #-1 ; CHECK-COMMON-NEXT: mov x19, sp @@ -188,7 +188,7 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_ ; CHECK-COMMON-NEXT: str z1, [x8, #-1, mul vl] ; CHECK-COMMON-NEXT: sub sp, x29, #8 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x19, [sp, #24] // 8-byte Reload ; CHECK-COMMON-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ret entry: @@ -218,7 +218,7 @@ define i32 @csr_d8_allocnxv4i32i32f64_stackargsi32f64(double %d0, double %d1, do ; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64_stackargsi32f64: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill -; CHECK-COMMON-NEXT: str x29, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x29, [sp, #8] // 8-byte Spill ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: addvl sp, sp, #-1 ; CHECK-COMMON-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 32 + 8 * VG @@ -234,7 +234,7 @@ define i32 @csr_d8_allocnxv4i32i32f64_stackargsi32f64(double %d0, double %d1, do ; CHECK-COMMON-NEXT: str z1, [x8] ; CHECK-COMMON-NEXT: addvl sp, sp, #1 ; CHECK-COMMON-NEXT: add sp, sp, #16 -; CHECK-COMMON-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x29, [sp, #8] // 8-byte Reload ; CHECK-COMMON-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ret entry: @@ -379,7 +379,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: .cfi_def_cfa_offset 64 ; CHECK-COMMON-NEXT: cntd x9 ; CHECK-COMMON-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK-COMMON-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: .cfi_def_cfa w29, 64 @@ -391,18 +391,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: .cfi_offset w30, -56 ; CHECK-COMMON-NEXT: .cfi_offset w29, -64 ; CHECK-COMMON-NEXT: addvl sp, sp, #-18 -; CHECK-COMMON-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-COMMON-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-COMMON-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-COMMON-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-COMMON-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -462,18 +462,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-COMMON-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-COMMON-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-COMMON-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-COMMON-NEXT: addvl sp, sp, #18 ; CHECK-COMMON-NEXT: .cfi_restore z8 ; CHECK-COMMON-NEXT: .cfi_restore z9 @@ -533,7 +533,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 ; CHECK-NEXT: .cfi_def_cfa w29, 48 @@ -604,7 +604,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: str x9, [sp, #80] // 8-byte Spill ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: add x29, sp, #64 ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 48 @@ -671,4 +671,4 @@ entry: tail call void @other() ret i32 %x } -declare void @other() +declare void @other() \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll index dbacd77315198..fddd5df323e46 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll @@ -247,7 +247,7 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; NONEON-NOSVE-NEXT: fcvt s18, h18 ; NONEON-NOSVE-NEXT: ldr h13, [sp, #90] ; NONEON-NOSVE-NEXT: ldr h14, [sp, #74] -; NONEON-NOSVE-NEXT: str h0, [sp, #14] // 2-byte Folded Spill +; NONEON-NOSVE-NEXT: str h0, [sp, #14] // 2-byte Spill ; NONEON-NOSVE-NEXT: ldr h0, [sp, #76] ; NONEON-NOSVE-NEXT: ldr h11, [sp, #88] ; NONEON-NOSVE-NEXT: ldr h12, [sp, #72] @@ -301,7 +301,7 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; NONEON-NOSVE-NEXT: fcvt s0, h0 ; NONEON-NOSVE-NEXT: fcvt h4, s4 ; NONEON-NOSVE-NEXT: fmul s2, s3, s2 -; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] // 2-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] // 2-byte Reload ; NONEON-NOSVE-NEXT: fcvt s16, h16 ; NONEON-NOSVE-NEXT: fcvt s6, h6 ; NONEON-NOSVE-NEXT: fcvt s3, h3 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll index 66e157c779abf..25c98c4d364c2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -1391,11 +1391,11 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) { ; NONEON-NOSVE-LABEL: fcvtzu_v16f64_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #304 -; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 304 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Reload ; NONEON-NOSVE-NEXT: ldp q6, q7, [x0] ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #96] @@ -3096,11 +3096,11 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) { ; NONEON-NOSVE-LABEL: fcvtzs_v16f64_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #304 -; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x29, [sp, #288] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 304 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x29, [sp, #288] // 8-byte Reload ; NONEON-NOSVE-NEXT: ldp q6, q7, [x0] ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #96] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll index c3dcb0f6d7f1f..f290e3bfd6e93 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -332,10 +332,10 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: add w10, w10, w10 ; NONEON-NOSVE-NEXT: strh w9, [sp, #90] ; NONEON-NOSVE-NEXT: sxtb w9, w11 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: strh w8, [sp, #88] ; NONEON-NOSVE-NEXT: sxtb w8, w10 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #86] ; NONEON-NOSVE-NEXT: add w11, w11, w11 ; NONEON-NOSVE-NEXT: strh w8, [sp, #84] @@ -633,9 +633,9 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: add w10, w10, w10 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] ; NONEON-NOSVE-NEXT: sxtb w9, w11 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: sxtb w8, w10 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w11, w11, w11 ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80] ; NONEON-NOSVE-NEXT: add w10, w10, w10 @@ -1038,9 +1038,9 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: add w10, w10, w10 ; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #208] ; NONEON-NOSVE-NEXT: sxtb x9, w11 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: sxtb x8, w10 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w11, w11, w11 ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144] ; NONEON-NOSVE-NEXT: add w10, w10, w10 @@ -1724,11 +1724,11 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: add w10, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #88] ; NONEON-NOSVE-NEXT: and w8, w9, #0xff -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldrb w30, [sp, #20] ; NONEON-NOSVE-NEXT: strh w8, [sp, #86] ; NONEON-NOSVE-NEXT: and w8, w10, #0xff -; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: strh w6, [sp, #78] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: add w6, w30, w30 @@ -2029,9 +2029,9 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: and w9, w11, #0xff ; NONEON-NOSVE-NEXT: and w8, w8, #0xff -; NONEON-NOSVE-NEXT: ldr w10, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w10, w10, w10 ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80] ; NONEON-NOSVE-NEXT: add w9, w9, w9 @@ -2459,9 +2459,9 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) { ; NONEON-NOSVE-NEXT: and w8, w8, #0xff ; NONEON-NOSVE-NEXT: and w9, w9, #0xff ; NONEON-NOSVE-NEXT: stp wzr, w8, [sp, #196] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: stp wzr, w9, [sp, #188] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: and w22, w22, #0xff ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: stp wzr, w22, [sp, #164] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll index 85b7b4d010062..f0d7f7df3db70 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -169,7 +169,7 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; NONEON-NOSVE-LABEL: smulh_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Spill ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill @@ -260,7 +260,7 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; NONEON-NOSVE-NEXT: strb w3, [sp, #74] ; NONEON-NOSVE-NEXT: lsr w13, w13, #8 ; NONEON-NOSVE-NEXT: strb w2, [sp, #73] -; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Reload ; NONEON-NOSVE-NEXT: lsr w10, w10, #8 ; NONEON-NOSVE-NEXT: strb w0, [sp, #72] ; NONEON-NOSVE-NEXT: lsr w8, w8, #8 @@ -427,7 +427,7 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #216] ; NONEON-NOSVE-NEXT: strb w14, [sp, #287] ; NONEON-NOSVE-NEXT: lsr w14, w25, #8 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Reload ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #241] ; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #240] @@ -435,19 +435,19 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: lsr w14, w28, #8 ; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #254] ; NONEON-NOSVE-NEXT: mul w8, w25, w8 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #252] ; NONEON-NOSVE-NEXT: strb w14, [sp, #283] -; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w9, w25, w9 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w17, [sp, #286] ; NONEON-NOSVE-NEXT: mul w12, w14, w12 ; NONEON-NOSVE-NEXT: lsr w8, w8, #8 ; NONEON-NOSVE-NEXT: lsr w17, w26, #8 ; NONEON-NOSVE-NEXT: mul w10, w25, w10 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Reload +; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Reload ; NONEON-NOSVE-NEXT: lsr w9, w9, #8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #281] ; NONEON-NOSVE-NEXT: mul w11, w25, w11 @@ -455,84 +455,84 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: lsr w17, w30, #8 ; NONEON-NOSVE-NEXT: mul w13, w14, w13 ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #280] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #320] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 ; NONEON-NOSVE-NEXT: mul w10, w10, w15 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #279] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w11, w11, w16 ; NONEON-NOSVE-NEXT: strb w9, [sp, #278] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 ; NONEON-NOSVE-NEXT: mul w12, w12, w18 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #277] ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #276] ; NONEON-NOSVE-NEXT: mul w13, w13, w0 ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w10, w10, w1 ; NONEON-NOSVE-NEXT: strb w8, [sp, #275] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 ; NONEON-NOSVE-NEXT: mul w11, w11, w2 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #274] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #273] ; NONEON-NOSVE-NEXT: mul w12, w12, w3 ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w13, w13, w4 ; NONEON-NOSVE-NEXT: strb w9, [sp, #272] ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 ; NONEON-NOSVE-NEXT: mul w10, w10, w5 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #271] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #270] ; NONEON-NOSVE-NEXT: mul w11, w11, w6 ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w12, w12, w7 ; NONEON-NOSVE-NEXT: strb w8, [sp, #269] ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 ; NONEON-NOSVE-NEXT: mul w13, w13, w19 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #268] ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #267] ; NONEON-NOSVE-NEXT: mul w10, w10, w20 ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w11, w11, w21 ; NONEON-NOSVE-NEXT: strb w9, [sp, #266] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w12, w12, w22 ; NONEON-NOSVE-NEXT: strb w8, [sp, #265] ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #264] ; NONEON-NOSVE-NEXT: mul w13, w13, w23 ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp w15, w14, [sp, #16] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: mul w10, w10, w24 ; NONEON-NOSVE-NEXT: strb w8, [sp, #263] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 ; NONEON-NOSVE-NEXT: mul w11, w11, w27 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #262] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #261] ; NONEON-NOSVE-NEXT: mul w12, w12, w15 ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 @@ -1365,7 +1365,7 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; NONEON-NOSVE-LABEL: umulh_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #160 -; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Spill ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill @@ -1456,7 +1456,7 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; NONEON-NOSVE-NEXT: strb w3, [sp, #74] ; NONEON-NOSVE-NEXT: lsr w13, w13, #8 ; NONEON-NOSVE-NEXT: strb w2, [sp, #73] -; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Reload ; NONEON-NOSVE-NEXT: lsr w10, w10, #8 ; NONEON-NOSVE-NEXT: strb w0, [sp, #72] ; NONEON-NOSVE-NEXT: lsr w8, w8, #8 @@ -1623,7 +1623,7 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #216] ; NONEON-NOSVE-NEXT: strb w14, [sp, #287] ; NONEON-NOSVE-NEXT: lsr w14, w25, #8 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Reload ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #241] ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #240] @@ -1631,19 +1631,19 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: lsr w14, w28, #8 ; NONEON-NOSVE-NEXT: ldrb w11, [sp, #254] ; NONEON-NOSVE-NEXT: mul w8, w25, w8 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #252] ; NONEON-NOSVE-NEXT: strb w14, [sp, #283] -; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w9, w25, w9 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w17, [sp, #286] ; NONEON-NOSVE-NEXT: mul w12, w14, w12 ; NONEON-NOSVE-NEXT: lsr w8, w8, #8 ; NONEON-NOSVE-NEXT: lsr w17, w26, #8 ; NONEON-NOSVE-NEXT: mul w10, w25, w10 -; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Reload +; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Reload ; NONEON-NOSVE-NEXT: lsr w9, w9, #8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #281] ; NONEON-NOSVE-NEXT: mul w11, w25, w11 @@ -1651,84 +1651,84 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: lsr w17, w30, #8 ; NONEON-NOSVE-NEXT: mul w13, w14, w13 ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #280] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #320] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 ; NONEON-NOSVE-NEXT: mul w10, w10, w15 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #279] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w11, w11, w16 ; NONEON-NOSVE-NEXT: strb w9, [sp, #278] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 ; NONEON-NOSVE-NEXT: mul w12, w12, w18 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #277] ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #276] ; NONEON-NOSVE-NEXT: mul w13, w13, w0 ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w10, w10, w1 ; NONEON-NOSVE-NEXT: strb w8, [sp, #275] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 ; NONEON-NOSVE-NEXT: mul w11, w11, w2 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #274] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #273] ; NONEON-NOSVE-NEXT: mul w12, w12, w3 ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w13, w13, w4 ; NONEON-NOSVE-NEXT: strb w9, [sp, #272] ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 ; NONEON-NOSVE-NEXT: mul w10, w10, w5 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #271] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #270] ; NONEON-NOSVE-NEXT: mul w11, w11, w6 ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w12, w12, w7 ; NONEON-NOSVE-NEXT: strb w8, [sp, #269] ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 ; NONEON-NOSVE-NEXT: mul w13, w13, w19 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #268] ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #267] ; NONEON-NOSVE-NEXT: mul w10, w10, w20 ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w11, w11, w21 ; NONEON-NOSVE-NEXT: strb w9, [sp, #266] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Reload ; NONEON-NOSVE-NEXT: mul w12, w12, w22 ; NONEON-NOSVE-NEXT: strb w8, [sp, #265] ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 -; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #264] ; NONEON-NOSVE-NEXT: mul w13, w13, w23 ; NONEON-NOSVE-NEXT: lsr w9, w11, #8 -; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp w15, w14, [sp, #16] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: mul w10, w10, w24 ; NONEON-NOSVE-NEXT: strb w8, [sp, #263] ; NONEON-NOSVE-NEXT: lsr w8, w12, #8 ; NONEON-NOSVE-NEXT: mul w11, w11, w27 -; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #262] ; NONEON-NOSVE-NEXT: lsr w9, w13, #8 -; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #261] ; NONEON-NOSVE-NEXT: mul w12, w12, w15 ; NONEON-NOSVE-NEXT: lsr w8, w10, #8 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index f7fadaa1217bc..200e462fe620c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -2444,11 +2444,11 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) { ; NONEON-NOSVE-LABEL: scvtf_v16i32_v16f64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #272 -; NONEON-NOSVE-NEXT: str x29, [sp, #256] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x29, [sp, #256] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] -; NONEON-NOSVE-NEXT: ldr x29, [sp, #256] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x29, [sp, #256] // 8-byte Reload ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: str q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q0, q2, [sp] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll index 3787b23547afc..e4763f0bb9ba9 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -338,7 +338,7 @@ define void @select_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: csel w9, w11, w10, eq ; NONEON-NOSVE-NEXT: cmp w13, w12 ; NONEON-NOSVE-NEXT: ldrb w1, [sp, #21] -; NONEON-NOSVE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #12] // 4-byte Spill ; NONEON-NOSVE-NEXT: csel w8, w13, w12, eq ; NONEON-NOSVE-NEXT: cmp w16, w14 ; NONEON-NOSVE-NEXT: ldrb w13, [sp, #38] @@ -372,7 +372,7 @@ define void @select_v32i8(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldrb w6, [sp, #44] ; NONEON-NOSVE-NEXT: cmp w5, w2 ; NONEON-NOSVE-NEXT: ldrb w30, [sp, #29] -; NONEON-NOSVE-NEXT: str w8, [sp] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp] // 4-byte Spill ; NONEON-NOSVE-NEXT: csel w2, w5, w2, eq ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46] ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30] @@ -705,7 +705,7 @@ define void @select_v16i16(ptr %a, ptr %b) { ; NONEON-NOSVE-LABEL: select_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #112 -; NONEON-NOSVE-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x19, [sp, #96] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 112 ; NONEON-NOSVE-NEXT: .cfi_offset w19, -16 ; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] @@ -776,7 +776,7 @@ define void @select_v16i16(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: cmp w19, w10 ; NONEON-NOSVE-NEXT: csel w10, w19, w10, eq ; NONEON-NOSVE-NEXT: strh w11, [sp, #92] -; NONEON-NOSVE-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x19, [sp, #96] // 8-byte Reload ; NONEON-NOSVE-NEXT: cmp w5, w4 ; NONEON-NOSVE-NEXT: strh w10, [sp, #94] ; NONEON-NOSVE-NEXT: csel w8, w5, w4, eq diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index 094eaad0cfe80..302942178a764 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -174,14 +174,14 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind { ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: mov x20, sp ; CHECK-NEXT: bl def ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] ; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3] -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: stp q0, q2, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll index 1fa4b5f62bdec..64762e48d0863 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -120,7 +120,7 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) { ; NONEON-NOSVE-LABEL: masked_load_v8i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #272 -; NONEON-NOSVE-NEXT: str x29, [sp, #256] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x29, [sp, #256] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 272 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 ; NONEON-NOSVE-NEXT: str d0, [sp, #240] @@ -212,7 +212,7 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) { ; NONEON-NOSVE-NEXT: strb w8, [sp, #31] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] ; NONEON-NOSVE-NEXT: .LBB1_11: // %else20 -; NONEON-NOSVE-NEXT: ldr x29, [sp, #256] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x29, [sp, #256] // 8-byte Reload ; NONEON-NOSVE-NEXT: add sp, sp, #272 ; NONEON-NOSVE-NEXT: ret ; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.load4 @@ -1630,7 +1630,7 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) { ; NONEON-NOSVE-LABEL: masked_load_v8f16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #496 -; NONEON-NOSVE-NEXT: str x29, [sp, #480] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x29, [sp, #480] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 496 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 ; NONEON-NOSVE-NEXT: str d0, [sp, #464] @@ -1723,7 +1723,7 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) { ; NONEON-NOSVE-NEXT: str h0, [sp, #46] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] ; NONEON-NOSVE-NEXT: .LBB6_11: // %else20 -; NONEON-NOSVE-NEXT: ldr x29, [sp, #480] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x29, [sp, #480] // 8-byte Reload ; NONEON-NOSVE-NEXT: add sp, sp, #496 ; NONEON-NOSVE-NEXT: ret ; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.load4 @@ -2346,7 +2346,7 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) { ; NONEON-NOSVE-LABEL: masked_load_v8f32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #496 -; NONEON-NOSVE-NEXT: str x29, [sp, #480] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x29, [sp, #480] // 8-byte Spill ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 496 ; NONEON-NOSVE-NEXT: .cfi_offset w29, -16 ; NONEON-NOSVE-NEXT: str d0, [sp, #464] @@ -2436,7 +2436,7 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) { ; NONEON-NOSVE-NEXT: stp s2, s1, [sp, #40] ; NONEON-NOSVE-NEXT: ldr q1, [sp, #32] ; NONEON-NOSVE-NEXT: .LBB10_11: // %else20 -; NONEON-NOSVE-NEXT: ldr x29, [sp, #480] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x29, [sp, #480] // 8-byte Reload ; NONEON-NOSVE-NEXT: add sp, sp, #496 ; NONEON-NOSVE-NEXT: ret ; NONEON-NOSVE-NEXT: .LBB10_12: // %cond.load4 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll index 9d241f6f927e1..4a540813f0773 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll @@ -184,9 +184,9 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w8, [sp, #100] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strb w9, [sp, #99] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #98] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w5, [sp, #95] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w9, w9, w9 @@ -247,7 +247,7 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #416] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] -; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Spill ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #432] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #400] // 16-byte Folded Spill @@ -341,7 +341,7 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #160] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #236] ; NONEON-NOSVE-NEXT: add w9, w9, w9 -; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #234] ; NONEON-NOSVE-NEXT: strb w9, [sp, #334] ; NONEON-NOSVE-NEXT: add w8, w8, w8 @@ -402,113 +402,113 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w8, [sp, #306] ; NONEON-NOSVE-NEXT: add w8, w30, w30 ; NONEON-NOSVE-NEXT: strb w8, [sp, #305] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #432] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #416] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #304] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #400] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #303] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #352] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #302] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #301] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #300] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #299] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #298] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #297] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #296] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #295] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #294] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #293] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #292] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #291] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #290] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #289] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #288] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #288] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #351] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #350] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #349] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #348] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #347] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #346] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #345] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #344] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #343] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #342] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #341] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #340] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #339] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #338] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #337] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #336] -; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Reload ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #320] ; NONEON-NOSVE-NEXT: stp q3, q2, [x8] ; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32] @@ -582,7 +582,7 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: sub sp, sp, #800 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] -; NONEON-NOSVE-NEXT: str x1, [sp, #408] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x1, [sp, #408] // 8-byte Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] @@ -600,114 +600,114 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #432] ; NONEON-NOSVE-NEXT: ldrh w13, [sp, #594] -; NONEON-NOSVE-NEXT: str w8, [sp, #64] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #64] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #432] ; NONEON-NOSVE-NEXT: ldrh w14, [sp, #592] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #464] -; NONEON-NOSVE-NEXT: ldr w30, [sp, #64] // 4-byte Folded Reload -; NONEON-NOSVE-NEXT: str w8, [sp, #404] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w30, [sp, #64] // 4-byte Reload +; NONEON-NOSVE-NEXT: str w8, [sp, #404] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #434] ; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #560] -; NONEON-NOSVE-NEXT: str w8, [sp, #400] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #400] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #436] ; NONEON-NOSVE-NEXT: str q5, [sp, #544] -; NONEON-NOSVE-NEXT: str w8, [sp, #396] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #396] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #438] ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #608] -; NONEON-NOSVE-NEXT: str w8, [sp, #392] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #392] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #440] ; NONEON-NOSVE-NEXT: ldrh w15, [sp, #638] ; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #640] ; NONEON-NOSVE-NEXT: ldrh w16, [sp, #636] ; NONEON-NOSVE-NEXT: ldrh w17, [sp, #634] -; NONEON-NOSVE-NEXT: str w8, [sp, #388] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #388] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #442] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #666] ; NONEON-NOSVE-NEXT: str q3, [sp, #416] ; NONEON-NOSVE-NEXT: ldrh w18, [sp, #632] ; NONEON-NOSVE-NEXT: ldrh w0, [sp, #630] -; NONEON-NOSVE-NEXT: str w8, [sp, #384] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #384] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #444] ; NONEON-NOSVE-NEXT: ldrh w1, [sp, #628] ; NONEON-NOSVE-NEXT: ldrh w2, [sp, #626] ; NONEON-NOSVE-NEXT: ldrh w3, [sp, #624] ; NONEON-NOSVE-NEXT: ldrh w4, [sp, #622] -; NONEON-NOSVE-NEXT: str w8, [sp, #380] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #380] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #446] ; NONEON-NOSVE-NEXT: ldrh w5, [sp, #620] ; NONEON-NOSVE-NEXT: ldrh w6, [sp, #618] ; NONEON-NOSVE-NEXT: ldrh w7, [sp, #616] ; NONEON-NOSVE-NEXT: ldrh w19, [sp, #614] -; NONEON-NOSVE-NEXT: str w8, [sp, #376] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #376] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #480] ; NONEON-NOSVE-NEXT: ldrh w20, [sp, #612] ; NONEON-NOSVE-NEXT: ldrh w21, [sp, #610] ; NONEON-NOSVE-NEXT: ldrh w22, [sp, #608] ; NONEON-NOSVE-NEXT: ldrh w23, [sp, #430] -; NONEON-NOSVE-NEXT: str w8, [sp, #372] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #372] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #482] ; NONEON-NOSVE-NEXT: ldrh w24, [sp, #428] ; NONEON-NOSVE-NEXT: ldrh w25, [sp, #426] ; NONEON-NOSVE-NEXT: ldrh w26, [sp, #424] ; NONEON-NOSVE-NEXT: ldrh w27, [sp, #422] -; NONEON-NOSVE-NEXT: str w8, [sp, #368] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #368] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #484] ; NONEON-NOSVE-NEXT: ldrh w28, [sp, #420] ; NONEON-NOSVE-NEXT: ldrh w29, [sp, #418] ; NONEON-NOSVE-NEXT: strb w30, [sp, #767] -; NONEON-NOSVE-NEXT: str w8, [sp, #364] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #364] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #486] -; NONEON-NOSVE-NEXT: str w8, [sp, #360] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #360] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #488] -; NONEON-NOSVE-NEXT: str w8, [sp, #356] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #356] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #490] -; NONEON-NOSVE-NEXT: str w8, [sp, #352] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #352] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #492] -; NONEON-NOSVE-NEXT: str w8, [sp, #348] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #348] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #494] -; NONEON-NOSVE-NEXT: str w8, [sp, #344] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #344] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #448] -; NONEON-NOSVE-NEXT: str w8, [sp, #340] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #340] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #450] -; NONEON-NOSVE-NEXT: str w8, [sp, #336] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #336] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #452] -; NONEON-NOSVE-NEXT: str w8, [sp, #332] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #332] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #454] -; NONEON-NOSVE-NEXT: str w8, [sp, #328] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #328] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #456] -; NONEON-NOSVE-NEXT: str w8, [sp, #324] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #324] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #458] -; NONEON-NOSVE-NEXT: str w8, [sp, #320] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #320] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #460] -; NONEON-NOSVE-NEXT: str w8, [sp, #316] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #316] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #462] -; NONEON-NOSVE-NEXT: str w8, [sp, #312] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #312] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #464] -; NONEON-NOSVE-NEXT: str w8, [sp, #308] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #308] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #466] -; NONEON-NOSVE-NEXT: str w8, [sp, #304] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #304] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #468] -; NONEON-NOSVE-NEXT: str w8, [sp, #300] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #300] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #470] -; NONEON-NOSVE-NEXT: str w8, [sp, #296] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #296] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #472] -; NONEON-NOSVE-NEXT: str w8, [sp, #292] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #292] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #474] -; NONEON-NOSVE-NEXT: str w8, [sp, #288] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #288] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #476] -; NONEON-NOSVE-NEXT: str w8, [sp, #284] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #284] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #478] -; NONEON-NOSVE-NEXT: str w8, [sp, #280] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #280] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #656] -; NONEON-NOSVE-NEXT: str w8, [sp, #276] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #276] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #658] -; NONEON-NOSVE-NEXT: str w8, [sp, #272] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #272] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #660] -; NONEON-NOSVE-NEXT: str w8, [sp, #268] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #268] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #662] -; NONEON-NOSVE-NEXT: str w8, [sp, #264] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #264] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #664] -; NONEON-NOSVE-NEXT: str w8, [sp, #260] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #260] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #668] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #252] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #670] @@ -858,302 +858,302 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w8, [sp, #738] ; NONEON-NOSVE-NEXT: add w8, w29, w29 ; NONEON-NOSVE-NEXT: strb w8, [sp, #737] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #766] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #736] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #736] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #735] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #734] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #733] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #732] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #731] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #730] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #729] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #728] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #727] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #726] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #725] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #724] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #723] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #722] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #721] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #720] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #783] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #782] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #781] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #780] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #779] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #778] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #777] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #776] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #775] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #774] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #773] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #772] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #771] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #770] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #769] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #768] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #719] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #156] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #156] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #718] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #717] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #164] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #164] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #716] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #715] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #172] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #172] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #714] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #713] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #180] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #180] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #712] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #711] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #188] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #188] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #710] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #709] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #196] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #196] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #708] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #707] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #204] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #204] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #706] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #705] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #212] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #212] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #704] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #704] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #799] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #220] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #220] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #798] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #797] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #228] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #228] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #796] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #232] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #232] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #795] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #236] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #236] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #794] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #793] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #244] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #244] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #792] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #248] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #248] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #791] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #252] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #252] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #790] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #256] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #789] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #788] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #787] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #786] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #785] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #784] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #280] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #280] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #768] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #687] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #686] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #288] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #685] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #292] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #292] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #684] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #683] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #682] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #304] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #304] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #681] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #680] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #312] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #312] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #679] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #678] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #320] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #320] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #677] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #676] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #675] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #674] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #336] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #336] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #673] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #340] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #340] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #672] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #344] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #344] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #703] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #348] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #348] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #702] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #352] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #701] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #356] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #356] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #700] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #699] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #698] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #697] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #696] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #695] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #694] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #693] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #692] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #691] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #690] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #689] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #688] -; NONEON-NOSVE-NEXT: ldr x8, [sp, #408] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x8, [sp, #408] // 8-byte Reload ; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #672] ; NONEON-NOSVE-NEXT: stp q1, q0, [x8] ; NONEON-NOSVE-NEXT: stp q4, q3, [x8, #32] @@ -1393,9 +1393,9 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w9, [sp, #164] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strb w8, [sp, #163] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #162] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w5, [sp, #159] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w8, w8, w8 @@ -1491,7 +1491,7 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: sub sp, sp, #480 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] -; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] @@ -1579,7 +1579,7 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #300] ; NONEON-NOSVE-NEXT: ldp w8, w30, [sp, #160] -; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #461] @@ -1639,107 +1639,107 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w8, [sp, #434] ; NONEON-NOSVE-NEXT: add w8, w30, w30 ; NONEON-NOSVE-NEXT: strb w8, [sp, #433] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #432] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #431] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #430] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #429] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #428] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #427] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #426] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #425] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #424] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #423] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #422] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #421] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #420] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #419] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #418] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #417] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #416] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #416] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #479] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #478] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #477] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #476] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #475] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #474] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #473] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #472] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #471] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #470] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #469] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #468] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #467] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #466] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #465] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #464] -; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Reload ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #448] ; NONEON-NOSVE-NEXT: stp q3, q2, [x8] ; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32] @@ -1989,9 +1989,9 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strh w9, [sp, #200] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #198] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #196] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: strh w5, [sp, #190] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w8, w8, w8 @@ -2227,109 +2227,109 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strh w8, [sp, #502] ; NONEON-NOSVE-NEXT: add w8, w30, w30 ; NONEON-NOSVE-NEXT: strh w8, [sp, #500] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #464] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #498] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #496] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #446] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #444] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #442] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #440] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #438] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #436] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #434] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #432] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #432] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #526] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #524] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #522] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #520] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #518] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #516] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #514] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #512] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Reload ; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #496] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #414] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #412] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #410] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #408] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #406] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #404] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #402] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #400] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #430] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #428] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #426] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #424] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #422] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #420] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #418] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #416] ; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #400] @@ -2596,7 +2596,7 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #320] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] -; NONEON-NOSVE-NEXT: str x1, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str x1, [sp, #24] // 8-byte Spill ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #160] @@ -2628,7 +2628,7 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: ldr w24, [sp, #248] ; NONEON-NOSVE-NEXT: ldr w22, [sp, #232] ; NONEON-NOSVE-NEXT: add w9, w27, w27 -; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] ; NONEON-NOSVE-NEXT: str q7, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #104] @@ -2692,7 +2692,7 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w8, [sp, #311] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strb w9, [sp, #306] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] // 4-byte Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #309] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: add w9, w9, w9 @@ -2707,7 +2707,7 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strb w6, [sp, #300] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #305] -; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] // 8-byte Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #304] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #288] @@ -3079,9 +3079,9 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: strh w9, [sp, #328] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #326] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #324] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w5, [sp, #318] ; NONEON-NOSVE-NEXT: add w5, w30, w30 @@ -3455,9 +3455,9 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind { ; NONEON-NOSVE-NEXT: str w9, [sp, #272] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: str w8, [sp, #300] -; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Reload ; NONEON-NOSVE-NEXT: str w9, [sp, #296] -; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: str w5, [sp, #364] ; NONEON-NOSVE-NEXT: add w5, w30, w30 diff --git a/llvm/test/CodeGen/AArch64/sve-tailcall.ll b/llvm/test/CodeGen/AArch64/sve-tailcall.ll index 4ddf007768fd2..d07f4a1fd9f86 100644 --- a/llvm/test/CodeGen/AArch64/sve-tailcall.ll +++ b/llvm/test/CodeGen/AArch64/sve-tailcall.ll @@ -52,18 +52,18 @@ define i32 @sve_caller_non_sve_callee( %arg) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -99,18 +99,18 @@ define i32 @sve_caller_non_sve_callee( %arg) nounwind { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -127,18 +127,18 @@ define i32 @sve_caller_non_sve_callee_fastcc( %arg) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -174,18 +174,18 @@ define i32 @sve_caller_non_sve_callee_fastcc( %arg) nounwind { ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll index 50580cb772937..be1f844c7f118 100644 --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -111,10 +111,10 @@ define @trunc_i64toi1_split3( %in) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: and z7.d, z7.d, #0x1 @@ -136,13 +136,13 @@ define @trunc_i64toi1_split3( %in) { ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s ; CHECK-NEXT: uzp1 p2.s, p4.s, p3.s -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p3.s, p6.s, p5.s -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.s, p0.s, p7.s -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll index 1dbd7ddf46328..def9d36cebb0c 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll @@ -10,10 +10,10 @@ define { , } @ld1_x2_i8(target("aarch64.svc ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -26,10 +26,10 @@ define { , } @ld1_x2_i8_scalar(target("aarc ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -43,10 +43,10 @@ define { , } @ld1_x2_i16(target("aarch64.sv ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -59,10 +59,10 @@ define { , } @ld1_x2_i16_scalar(target("aar ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -76,10 +76,10 @@ define { , } @ld1_x2_i32(target("aarch64.sv ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -92,10 +92,10 @@ define { , } @ld1_x2_i32_scalar(target("aar ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0, x1, lsl #2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -109,10 +109,10 @@ define { , } @ld1_x2_i64(target("aarch64.sv ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -125,10 +125,10 @@ define { , } @ld1_x2_i64_scalar(target("aar ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0, x1, lsl #3] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -142,10 +142,10 @@ define { , } @ld1_x2_f16(target("aarch64. ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -158,10 +158,10 @@ define { , } @ld1_x2_f16_scalar(target("a ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -175,10 +175,10 @@ define { , } @ld1_x2_bf16(target("aar ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -191,10 +191,10 @@ define { , } @ld1_x2_bf16_scalar(targ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -208,10 +208,10 @@ define { , } @ld1_x2_f32(target("aarch6 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -224,10 +224,10 @@ define { , } @ld1_x2_f32_scalar(target( ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0, x1, lsl #2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -241,10 +241,10 @@ define { , } @ld1_x2_f64(target("aarc ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -257,10 +257,10 @@ define { , } @ld1_x2_f64_scalar(targe ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0, x1, lsl #3] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -275,12 +275,12 @@ define @ld1_x2_i8_z0_taken(target("aarch64.svcount") %pn, ptr ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1b { z2.b, z3.b }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.b, z0.b, z2.b ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -297,12 +297,12 @@ define @ld1_x2_i8_z0_taken_scalar(target("aarch64.svcount") % ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1b { z2.b, z3.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.b, z0.b, z2.b ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -319,10 +319,10 @@ define { , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , @ld1_x4_i16_z0_taken(target("aarch64.svcount") %pn, pt ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z4.h - z7.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.h, z0.h, z4.h ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -606,12 +606,12 @@ define @ld1_x4_i16_z0_taken_scalar(target("aarch64.svcount") ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ld1h { z4.h - z7.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.h, z0.h, z4.h ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -630,10 +630,10 @@ define { , } @ldnt1_x2_i8(target("aarch64.s ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1b { z0.b, z1.b }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -646,10 +646,10 @@ define { , } @ldnt1_x2_i8_scalar(target("aa ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1b { z0.b, z1.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -663,10 +663,10 @@ define { , } @ldnt1_x2_i16(target("aarch64. ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -679,10 +679,10 @@ define { , } @ldnt1_x2_i16_scalar(target("a ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -696,10 +696,10 @@ define { , } @ldnt1_x2_i32(target("aarch64. ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -712,10 +712,10 @@ define { , } @ldnt1_x2_i32_scalar(target("a ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0, x1, lsl #2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -729,10 +729,10 @@ define { , } @ldnt1_x2_i64(target("aarch64. ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -745,10 +745,10 @@ define { , } @ldnt1_x2_i64_scalar(target("a ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0, x1, lsl #3] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -762,10 +762,10 @@ define { , } @ldnt1_x2_f16(target("aarch6 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -778,10 +778,10 @@ define { , } @ldnt1_x2_f16_scalar(target( ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -795,10 +795,10 @@ define { , } @ldnt1_x2_bf16(target("a ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -811,10 +811,10 @@ define { , } @ldnt1_x2_bf16_scalar(ta ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0, x1, lsl #1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -828,10 +828,10 @@ define { , } @ldnt1_x2_f32(target("aarc ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -844,10 +844,10 @@ define { , } @ldnt1_x2_f32_scalar(targe ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0, x1, lsl #2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -861,10 +861,10 @@ define { , } @ldnt1_x2_f64(target("aa ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -877,10 +877,10 @@ define { , } @ldnt1_x2_f64_scalar(tar ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0, x1, lsl #3] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -895,12 +895,12 @@ define @ldnt1_x2_i32_z0_taken(target("aarch64.svcount") %pn, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1w { z2.s, z3.s }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.s, z0.s, z2.s ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -917,12 +917,12 @@ define @ldnt1_x2_i32_z0_taken_scalar(target("aarch64.svcount" ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1w { z2.s, z3.s }, pn8/z, [x0, x1, lsl #2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.s, z0.s, z2.s ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -939,10 +939,10 @@ define { , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , @ldnt1_x4_i64_z0_taken(target("aarch64.svcount") %pn, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1d { z4.d - z7.d }, pn8/z, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.d, z0.d, z4.d ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1226,12 +1226,12 @@ define @ldnt1_x4_i64_z0_taken_scalar(target("aarch64.svcount" ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: ldnt1d { z4.d - z7.d }, pn8/z, [x0, x1, lsl #3] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: add z0.d, z0.d, z4.d ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll index ffcc412b50e21..4bf0b09a0cc1d 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll @@ -8,10 +8,10 @@ define @pext_b(target("aarch64.svcount") %x) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext p0.b, pn8[2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -24,10 +24,10 @@ define @pext_h(target("aarch64.svcount") %x) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext p0.h, pn8[2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -40,10 +40,10 @@ define @pext_s(target("aarch64.svcount") %x) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext p0.s, pn8[2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -56,10 +56,10 @@ define @pext_d(target("aarch64.svcount") %x) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext p0.d, pn8[2] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -77,10 +77,10 @@ define {,} @pext_x2_b(target("aarch64.svcou ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext { p0.b, p1.b }, pn8[1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -93,10 +93,10 @@ define {,} @pext_x2_h(target("aarch64.svcount" ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext { p0.h, p1.h }, pn8[1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -109,10 +109,10 @@ define {,} @pext_x2_s(target("aarch64.svcount" ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext { p0.s, p1.s }, pn8[1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -125,10 +125,10 @@ define {,} @pext_x2_d(target("aarch64.svcount" ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: pext { p0.d, p1.d }, pn8[1] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll index f30b924fb9e3b..08e1e32021af8 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll @@ -10,12 +10,12 @@ define { , } @sel_x2_i8(target("aarch64.svc ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.b, z1.b }, pn8, { z6.b, z7.b }, { z4.b, z5.b } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -30,12 +30,12 @@ define { , } @sel_x2_i16(target("aarch64.sv ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -50,12 +50,12 @@ define { , } @sel_x2_f16(target("aarch64. ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -70,12 +70,12 @@ define { , } @sel_x2_bf16(target("aar ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -90,12 +90,12 @@ define { , } @sel_x2_i32(target("aarch64.sv ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.s, z1.s }, pn8, { z6.s, z7.s }, { z4.s, z5.s } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -110,12 +110,12 @@ define { , } @sel_x2_f32(target("aarch6 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.s, z1.s }, pn8, { z6.s, z7.s }, { z4.s, z5.s } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -130,12 +130,12 @@ define { , } @sel_x2_i64(target("aarch64.sv ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.d, z1.d }, pn8, { z6.d, z7.d }, { z4.d, z5.d } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -150,12 +150,12 @@ define { , } @sel_x2_f64(target("aarc ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: sel { z0.d, z1.d }, pn8, { z6.d, z7.d }, { z4.d, z5.d } -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll index 58ba7603a702d..be03cfee1eec3 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll @@ -10,7 +10,7 @@ define { , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -119,11 +119,11 @@ define void @st1_x2_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -137,11 +137,11 @@ define void @st1_x2_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -155,13 +155,13 @@ define void @st1_x4_i8( %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -275,13 +275,13 @@ define void @st1_x4_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -295,13 +295,13 @@ define void @st1_x4_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -317,11 +317,11 @@ define void @stnt1_x2_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -353,11 +353,11 @@ define void @stnt1_x2_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -371,11 +371,11 @@ define void @stnt1_x2_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -389,11 +389,11 @@ define void @stnt1_x2_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -407,11 +407,11 @@ define void @stnt1_x2_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -425,11 +425,11 @@ define void @stnt1_x2_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -443,11 +443,11 @@ define void @stnt1_x2_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -461,13 +461,13 @@ define void @stnt1_x4_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -501,13 +501,13 @@ define void @stnt1_x4_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -521,13 +521,13 @@ define void @stnt1_x4_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -541,13 +541,13 @@ define void @stnt1_x4_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -561,13 +561,13 @@ define void @stnt1_x4_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -581,13 +581,13 @@ define void @stnt1_x4_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -601,13 +601,13 @@ define void @stnt1_x4_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/swift-async-win.ll b/llvm/test/CodeGen/AArch64/swift-async-win.ll index 94308979b07f8..69c83072fbc85 100644 --- a/llvm/test/CodeGen/AArch64/swift-async-win.ll +++ b/llvm/test/CodeGen/AArch64/swift-async-win.ll @@ -21,7 +21,7 @@ define hidden swifttailcc void @"$ss23withCheckedContinuation8function_xSS_yScCy ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: stp x30, x29, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #24 -; CHECK-NEXT: str x19, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #40] // 8-byte Spill ; CHECK-NEXT: adrp x19, __imp_swift_task_dealloc ; CHECK-NEXT: str xzr, [sp, #16] ; CHECK-NEXT: ldr x8, [x0] @@ -36,7 +36,7 @@ define hidden swifttailcc void @"$ss23withCheckedContinuation8function_xSS_yScCy ; CHECK-NEXT: ldp x30, x29, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldr x1, [x20, #8] -; CHECK-NEXT: ldr x19, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #40] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: br x1 entryresume.0: diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll index 07ee87e880aff..867e89748d86d 100644 --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -112,18 +112,18 @@ define float @caller(ptr %error_ref) { ; CHECK-O0-AARCH64-NEXT: .cfi_offset w21, -24 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w22, -32 ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x1 -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov x21, xzr ; CHECK-O0-AARCH64-NEXT: bl _foo -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #16] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: cbnz x21, LBB1_2 ; CHECK-O0-AARCH64-NEXT: ; %bb.1: ; %cont -; CHECK-O0-AARCH64-NEXT: ldr x9, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x9, [sp, #8] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #16] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-AARCH64-NEXT: strb w8, [x9] ; CHECK-O0-AARCH64-NEXT: LBB1_2: ; %handler -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #16] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: bl _free ; CHECK-O0-AARCH64-NEXT: fmov s0, #1.00000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload @@ -173,20 +173,20 @@ define float @caller(ptr %error_ref) { ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w21, -24 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w22, -32 ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x1 -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr ; CHECK-O0-ARM64_32-NEXT: bl _foo -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: cmp x21, #0 ; CHECK-O0-ARM64_32-NEXT: b.ne LBB1_2 ; CHECK-O0-ARM64_32-NEXT: ; %bb.1: ; %cont -; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp, #8] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #16] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-ARM64_32-NEXT: ; kill: def $w0 killed $w8 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x9] ; CHECK-O0-ARM64_32-NEXT: LBB1_2: ; %handler -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #16] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: bl _free ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload @@ -268,27 +268,27 @@ define float @caller2(ptr %error_ref) { ; CHECK-O0-AARCH64-NEXT: .cfi_offset w21, -24 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w22, -32 ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x1 -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: LBB2_1: ; %bb_loop ; CHECK-O0-AARCH64-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-O0-AARCH64-NEXT: mov x21, xzr ; CHECK-O0-AARCH64-NEXT: bl _foo -; CHECK-O0-AARCH64-NEXT: str s0, [sp, #4] ; 4-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str s0, [sp, #4] ; 4-byte Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: cbnz x21, LBB2_4 ; CHECK-O0-AARCH64-NEXT: ; %bb.2: ; %cont ; CHECK-O0-AARCH64-NEXT: ; in Loop: Header=BB2_1 Depth=1 -; CHECK-O0-AARCH64-NEXT: ldr s0, [sp, #4] ; 4-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr s0, [sp, #4] ; 4-byte Reload ; CHECK-O0-AARCH64-NEXT: fmov s1, #1.00000000 ; CHECK-O0-AARCH64-NEXT: fcmp s0, s1 ; CHECK-O0-AARCH64-NEXT: b.le LBB2_1 ; CHECK-O0-AARCH64-NEXT: ; %bb.3: ; %bb_end -; CHECK-O0-AARCH64-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x9, [sp, #16] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-AARCH64-NEXT: strb w8, [x9] ; CHECK-O0-AARCH64-NEXT: LBB2_4: ; %handler -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: bl _free ; CHECK-O0-AARCH64-NEXT: fmov s0, #1.00000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload @@ -349,29 +349,29 @@ define float @caller2(ptr %error_ref) { ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w21, -24 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w22, -32 ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x1 -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: LBB2_1: ; %bb_loop ; CHECK-O0-ARM64_32-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr ; CHECK-O0-ARM64_32-NEXT: bl _foo -; CHECK-O0-ARM64_32-NEXT: str s0, [sp, #4] ; 4-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str s0, [sp, #4] ; 4-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: cmp x21, #0 ; CHECK-O0-ARM64_32-NEXT: b.ne LBB2_4 ; CHECK-O0-ARM64_32-NEXT: ; %bb.2: ; %cont ; CHECK-O0-ARM64_32-NEXT: ; in Loop: Header=BB2_1 Depth=1 -; CHECK-O0-ARM64_32-NEXT: ldr s0, [sp, #4] ; 4-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr s0, [sp, #4] ; 4-byte Reload ; CHECK-O0-ARM64_32-NEXT: fmov s1, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: fcmp s0, s1 ; CHECK-O0-ARM64_32-NEXT: b.le LBB2_1 ; CHECK-O0-ARM64_32-NEXT: ; %bb.3: ; %bb_end -; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp, #16] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-ARM64_32-NEXT: ; kill: def $w0 killed $w8 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x9] ; CHECK-O0-ARM64_32-NEXT: LBB2_4: ; %handler -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: bl _free ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload @@ -435,7 +435,7 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: cbz w0, LBB3_2 ; CHECK-O0-AARCH64-NEXT: ; %bb.1: ; %gen_error ; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 @@ -449,7 +449,7 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-O0-AARCH64-NEXT: add sp, sp, #32 ; CHECK-O0-AARCH64-NEXT: ret ; CHECK-O0-AARCH64-NEXT: LBB3_2: ; %normal -; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: movi d0, #0000000000000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: add sp, sp, #32 @@ -458,10 +458,10 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-O0-ARM64_32-LABEL: foo_if: ; CHECK-O0-ARM64_32: ; %bb.0: ; %entry ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #32 -; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 32 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: cbz w0, LBB3_2 ; CHECK-O0-ARM64_32-NEXT: ; %bb.1: ; %gen_error ; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 @@ -471,13 +471,13 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x0, #8] ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 -; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #32 ; CHECK-O0-ARM64_32-NEXT: ret ; CHECK-O0-ARM64_32-NEXT: LBB3_2: ; %normal -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #8] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: movi d0, #0000000000000000 -; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #32 ; CHECK-O0-ARM64_32-NEXT: ret @@ -549,7 +549,7 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-O0-AARCH64-NEXT: str s0, [sp, #16] ; 4-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str s0, [sp, #16] ; 4-byte Spill ; CHECK-O0-AARCH64-NEXT: stur w0, [x29, #-12] ; 4-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: stur x21, [x29, #-8] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: b LBB4_1 @@ -557,7 +557,7 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-O0-AARCH64-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-O0-AARCH64-NEXT: ldur w8, [x29, #-12] ; 4-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: ldur x0, [x29, #-8] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: cbz w8, LBB4_3 ; CHECK-O0-AARCH64-NEXT: ; %bb.2: ; %gen_error ; CHECK-O0-AARCH64-NEXT: ; in Loop: Header=BB4_1 Depth=1 @@ -567,18 +567,18 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-O0-AARCH64-NEXT: mov x9, x0 ; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: strb w8, [x9, #8] -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: LBB4_3: ; %bb_cont ; CHECK-O0-AARCH64-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-O0-AARCH64-NEXT: ldr s0, [sp, #16] ; 4-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: ldr s0, [sp, #16] ; 4-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: fmov s1, #1.00000000 ; CHECK-O0-AARCH64-NEXT: fcmp s0, s1 ; CHECK-O0-AARCH64-NEXT: stur x0, [x29, #-8] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: b.le LBB4_1 ; CHECK-O0-AARCH64-NEXT: ; %bb.4: ; %bb_end -; CHECK-O0-AARCH64-NEXT: ldr x21, [sp] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x21, [sp] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: movi d0, #0000000000000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: add sp, sp, #48 @@ -587,18 +587,18 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-O0-ARM64_32-LABEL: foo_loop: ; CHECK-O0-ARM64_32: ; %bb.0: ; %entry ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #48 -; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #32] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #32] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 48 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 -; CHECK-O0-ARM64_32-NEXT: str s0, [sp, #16] ; 4-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str w0, [sp, #20] ; 4-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #24] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str s0, [sp, #16] ; 4-byte Spill +; CHECK-O0-ARM64_32-NEXT: str w0, [sp, #20] ; 4-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #24] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: b LBB4_1 ; CHECK-O0-ARM64_32-NEXT: LBB4_1: ; %bb_loop ; CHECK-O0-ARM64_32-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-O0-ARM64_32-NEXT: ldr w8, [sp, #20] ; 4-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr w8, [sp, #20] ; 4-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #24] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: cbz w8, LBB4_3 ; CHECK-O0-ARM64_32-NEXT: ; %bb.2: ; %gen_error ; CHECK-O0-ARM64_32-NEXT: ; in Loop: Header=BB4_1 Depth=1 @@ -610,20 +610,20 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-O0-ARM64_32-NEXT: mov x0, x9 ; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x9, #8] -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: LBB4_3: ; %bb_cont ; CHECK-O0-ARM64_32-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-O0-ARM64_32-NEXT: ldr s0, [sp, #16] ; 4-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr s0, [sp, #16] ; 4-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: fmov s1, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: fcmp s0, s1 -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #24] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: b.le LBB4_1 ; CHECK-O0-ARM64_32-NEXT: ; %bb.4: ; %bb_end -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: movi d0, #0000000000000000 -; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #32] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #32] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #48 ; CHECK-O0-ARM64_32-NEXT: ret @@ -690,11 +690,11 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 ; CHECK-O0-AARCH64-NEXT: stur w0, [x29, #-4] ; 4-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x8, [sp] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x8, [sp] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: bl _malloc -; CHECK-O0-AARCH64-NEXT: ldr x8, [sp] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x8, [sp] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: mov x10, x0 ; CHECK-O0-AARCH64-NEXT: ldur w0, [x29, #-4] ; 4-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: mov x21, x10 @@ -708,22 +708,22 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror ; CHECK-O0-ARM64_32-LABEL: foo_sret: ; CHECK-O0-ARM64_32: ; %bb.0: ; %entry ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #32 -; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 32 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 -; CHECK-O0-ARM64_32-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x8, [sp] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str w0, [sp, #12] ; 4-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x8, [sp] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: bl _malloc -; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: mov x10, x0 -; CHECK-O0-ARM64_32-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr w0, [sp, #12] ; 4-byte Reload ; CHECK-O0-ARM64_32-NEXT: mov x21, x10 ; CHECK-O0-ARM64_32-NEXT: mov w9, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w9, [x10, #8] ; CHECK-O0-ARM64_32-NEXT: str w0, [x8, #4] -; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #32 ; CHECK-O0-ARM64_32-NEXT: ret @@ -786,20 +786,20 @@ define float @caller3(ptr %error_ref) { ; CHECK-O0-AARCH64-NEXT: .cfi_offset w21, -24 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w22, -32 ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x1 -; CHECK-O0-AARCH64-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov x21, xzr ; CHECK-O0-AARCH64-NEXT: add x8, sp, #24 ; CHECK-O0-AARCH64-NEXT: mov w0, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: bl _foo_sret -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: cbnz x21, LBB6_2 ; CHECK-O0-AARCH64-NEXT: ; %bb.1: ; %cont -; CHECK-O0-AARCH64-NEXT: ldr x9, [sp] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x9, [sp] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-AARCH64-NEXT: strb w8, [x9] ; CHECK-O0-AARCH64-NEXT: LBB6_2: ; %handler -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: bl _free ; CHECK-O0-AARCH64-NEXT: fmov s0, #1.00000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload @@ -851,22 +851,22 @@ define float @caller3(ptr %error_ref) { ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w21, -24 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w22, -32 ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x1 -; CHECK-O0-ARM64_32-NEXT: str x0, [sp] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr ; CHECK-O0-ARM64_32-NEXT: add x8, sp, #24 ; CHECK-O0-ARM64_32-NEXT: mov w0, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: bl _foo_sret -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: cmp x21, #0 ; CHECK-O0-ARM64_32-NEXT: b.ne LBB6_2 ; CHECK-O0-ARM64_32-NEXT: ; %bb.1: ; %cont -; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #8] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-ARM64_32-NEXT: ; kill: def $w0 killed $w8 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x9] ; CHECK-O0-ARM64_32-NEXT: LBB6_2: ; %handler -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: bl _free ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload @@ -997,7 +997,7 @@ define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) { ; CHECK-O0-ARM64_32-LABEL: foo_vararg: ; CHECK-O0-ARM64_32: ; %bb.0: ; %entry ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #48 -; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #32] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #32] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 48 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 ; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 @@ -1031,7 +1031,7 @@ define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) { ; CHECK-O0-ARM64_32-NEXT: ldr w8, [x8] ; CHECK-O0-ARM64_32-NEXT: str w8, [sp, #12] ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 -; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #32] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #32] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #48 ; CHECK-O0-ARM64_32-NEXT: ret @@ -1114,7 +1114,7 @@ define float @caller4(ptr %error_ref) { ; CHECK-O0-AARCH64-NEXT: .cfi_offset w21, -24 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w22, -32 ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x1 -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #24] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov x21, xzr ; CHECK-O0-AARCH64-NEXT: mov w8, #10 ; =0xa ; CHECK-O0-AARCH64-NEXT: stur w8, [x29, #-28] @@ -1133,15 +1133,15 @@ define float @caller4(ptr %error_ref) { ; CHECK-O0-AARCH64-NEXT: str x10, [x9, #8] ; CHECK-O0-AARCH64-NEXT: str x8, [x9] ; CHECK-O0-AARCH64-NEXT: bl _foo_vararg -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #32] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #32] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: cbnz x21, LBB8_2 ; CHECK-O0-AARCH64-NEXT: ; %bb.1: ; %cont -; CHECK-O0-AARCH64-NEXT: ldr x9, [sp, #24] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #32] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x9, [sp, #24] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x8, [sp, #32] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-AARCH64-NEXT: strb w8, [x9] ; CHECK-O0-AARCH64-NEXT: LBB8_2: ; %handler -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #32] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #32] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: bl _free ; CHECK-O0-AARCH64-NEXT: fmov s0, #1.00000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload @@ -1200,7 +1200,7 @@ define float @caller4(ptr %error_ref) { ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w21, -24 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w22, -32 ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x1 -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr ; CHECK-O0-ARM64_32-NEXT: mov w8, #10 ; =0xa ; CHECK-O0-ARM64_32-NEXT: str w8, [sp, #40] @@ -1216,17 +1216,17 @@ define float @caller4(ptr %error_ref) { ; CHECK-O0-ARM64_32-NEXT: str w10, [x9, #4] ; CHECK-O0-ARM64_32-NEXT: str w8, [x9] ; CHECK-O0-ARM64_32-NEXT: bl _foo_vararg -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #24] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #24] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: cmp x21, #0 ; CHECK-O0-ARM64_32-NEXT: b.ne LBB8_2 ; CHECK-O0-ARM64_32-NEXT: ; %bb.1: ; %cont -; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x9, [sp, #16] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp, #24] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: ldrb w8, [x8, #8] ; CHECK-O0-ARM64_32-NEXT: ; kill: def $w0 killed $w8 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x9] ; CHECK-O0-ARM64_32-NEXT: LBB8_2: ; %handler -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #24] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #24] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: bl _free ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload @@ -1361,11 +1361,11 @@ define swiftcc void @swifterror_clobber(ptr nocapture swifterror %err) { ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: ; InlineAsm Start ; CHECK-O0-AARCH64-NEXT: nop ; CHECK-O0-AARCH64-NEXT: ; InlineAsm End -; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: add sp, sp, #32 ; CHECK-O0-AARCH64-NEXT: ret @@ -1374,11 +1374,11 @@ define swiftcc void @swifterror_clobber(ptr nocapture swifterror %err) { ; CHECK-O0-ARM64_32: ; %bb.0: ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #16 ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: ; InlineAsm Start ; CHECK-O0-ARM64_32-NEXT: nop ; CHECK-O0-ARM64_32-NEXT: ; InlineAsm End -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #8] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #16 ; CHECK-O0-ARM64_32-NEXT: ret call void asm sideeffect "nop", "~{x21}"() @@ -1458,7 +1458,7 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s ; CHECK-APPLE-NEXT: .cfi_offset w26, -72 ; CHECK-APPLE-NEXT: .cfi_offset w27, -80 ; CHECK-APPLE-NEXT: .cfi_offset w28, -96 -; CHECK-APPLE-NEXT: str x20, [sp] ; 8-byte Folded Spill +; CHECK-APPLE-NEXT: str x20, [sp] ; 8-byte Spill ; CHECK-APPLE-NEXT: mov x23, x7 ; CHECK-APPLE-NEXT: mov x24, x6 ; CHECK-APPLE-NEXT: mov x25, x5 @@ -1493,14 +1493,14 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s ; CHECK-APPLE-NEXT: ldp x23, x22, [sp, #64] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ldp x25, x24, [sp, #48] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ldp x27, x26, [sp, #32] ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: ldr x28, [sp, #16] ; 8-byte Folded Reload +; CHECK-APPLE-NEXT: ldr x28, [sp, #16] ; 8-byte Reload ; CHECK-APPLE-NEXT: add sp, sp, #112 ; CHECK-APPLE-NEXT: ret ; ; CHECK-O0-AARCH64-LABEL: params_in_reg: ; CHECK-O0-AARCH64: ; %bb.0: ; CHECK-O0-AARCH64-NEXT: sub sp, sp, #128 -; CHECK-O0-AARCH64-NEXT: str x20, [sp, #96] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x20, [sp, #96] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: stp x29, x30, [sp, #112] ; 16-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: add x29, sp, #112 ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 @@ -1508,15 +1508,15 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w20, -32 ; CHECK-O0-AARCH64-NEXT: stur x21, [x29, #-32] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x20, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x20, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: stur x7, [x29, #-40] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: stur x6, [x29, #-48] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x5, [sp, #56] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x4, [sp, #48] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x3, [sp, #40] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x2, [sp, #32] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x1, [sp, #24] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x5, [sp, #56] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x4, [sp, #48] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x3, [sp, #40] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x2, [sp, #32] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x1, [sp, #24] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x0 ; CHECK-O0-AARCH64-NEXT: mov x20, xzr ; CHECK-O0-AARCH64-NEXT: mov x21, x20 @@ -1537,20 +1537,20 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s ; CHECK-O0-AARCH64-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-AARCH64-NEXT: mov w7, w8 ; CHECK-O0-AARCH64-NEXT: bl _params_in_reg2 -; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x1, [sp, #24] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x2, [sp, #32] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x3, [sp, #40] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x4, [sp, #48] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x5, [sp, #56] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #8] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #16] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x1, [sp, #24] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x2, [sp, #32] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x3, [sp, #40] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x4, [sp, #48] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x5, [sp, #56] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldur x6, [x29, #-48] ; 8-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: ldur x7, [x29, #-40] ; 8-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: mov x8, x21 ; CHECK-O0-AARCH64-NEXT: ldur x21, [x29, #-32] ; 8-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: bl _params_in_reg2 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #112] ; 16-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #96] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #96] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: add sp, sp, #128 ; CHECK-O0-AARCH64-NEXT: ret ; @@ -1561,16 +1561,16 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 112 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -8 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w20, -16 -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #80] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x20, [sp, #8] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #72] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #64] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #56] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #48] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #40] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #32] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #24] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #80] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x20, [sp, #8] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #72] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #64] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #56] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #48] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #40] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #32] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #24] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x0 ; CHECK-O0-ARM64_32-NEXT: mov x20, xzr ; CHECK-O0-ARM64_32-NEXT: mov x21, x20 @@ -1591,17 +1591,17 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s ; CHECK-O0-ARM64_32-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-ARM64_32-NEXT: mov w7, w8 ; CHECK-O0-ARM64_32-NEXT: bl _params_in_reg2 -; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #24] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #32] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #40] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #48] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #56] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #64] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #72] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp, #8] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #16] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #24] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #32] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #40] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #48] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #56] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #64] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #72] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: mov x8, x21 -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #80] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #80] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: bl _params_in_reg2 ; CHECK-O0-ARM64_32-NEXT: ldp x20, x30, [sp, #96] ; 16-byte Folded Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #112 @@ -1638,7 +1638,7 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-APPLE-NEXT: .cfi_offset w27, -80 ; CHECK-APPLE-NEXT: .cfi_offset w28, -96 ; CHECK-APPLE-NEXT: mov x23, x21 -; CHECK-APPLE-NEXT: str x7, [sp, #16] ; 8-byte Folded Spill +; CHECK-APPLE-NEXT: str x7, [sp, #16] ; 8-byte Spill ; CHECK-APPLE-NEXT: mov x24, x6 ; CHECK-APPLE-NEXT: mov x25, x5 ; CHECK-APPLE-NEXT: mov x26, x4 @@ -1657,7 +1657,7 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-APPLE-NEXT: mov x20, xzr ; CHECK-APPLE-NEXT: mov x21, xzr ; CHECK-APPLE-NEXT: bl _params_in_reg2 -; CHECK-APPLE-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-APPLE-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-APPLE-NEXT: mov x0, x22 ; CHECK-APPLE-NEXT: mov x1, x19 ; CHECK-APPLE-NEXT: mov x2, x28 @@ -1676,7 +1676,7 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-APPLE-NEXT: mov x27, x5 ; CHECK-APPLE-NEXT: mov x28, x6 ; CHECK-APPLE-NEXT: mov x23, x7 -; CHECK-APPLE-NEXT: str x21, [sp, #24] ; 8-byte Folded Spill +; CHECK-APPLE-NEXT: str x21, [sp, #24] ; 8-byte Spill ; CHECK-APPLE-NEXT: mov w0, #1 ; =0x1 ; CHECK-APPLE-NEXT: mov w1, #2 ; =0x2 ; CHECK-APPLE-NEXT: mov w2, #3 ; =0x3 @@ -1686,7 +1686,7 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-APPLE-NEXT: mov w6, #7 ; =0x7 ; CHECK-APPLE-NEXT: mov w7, #8 ; =0x8 ; CHECK-APPLE-NEXT: mov x20, xzr -; CHECK-APPLE-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload +; CHECK-APPLE-NEXT: ldr x21, [sp, #8] ; 8-byte Reload ; CHECK-APPLE-NEXT: bl _params_in_reg2 ; CHECK-APPLE-NEXT: mov x0, x19 ; CHECK-APPLE-NEXT: mov x1, x22 @@ -1716,38 +1716,38 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w20, -24 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w28, -32 -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #72] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x20, [sp] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x7, [sp, #64] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x6, [sp, #56] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x5, [sp, #48] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x4, [sp, #40] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x3, [sp, #32] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x2, [sp, #24] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #72] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x20, [sp] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x7, [sp, #64] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x6, [sp, #56] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x5, [sp, #48] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x4, [sp, #40] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x3, [sp, #32] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x2, [sp, #24] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x1, [sp, #16] ; 8-byte Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x0 ; CHECK-O0-AARCH64-NEXT: mov x20, xzr -; CHECK-O0-AARCH64-NEXT: str x20, [sp, #80] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x20, [sp, #80] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov x21, x20 ; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 -; CHECK-O0-AARCH64-NEXT: str x0, [sp, #88] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x0, [sp, #88] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #2 ; =0x2 ; CHECK-O0-AARCH64-NEXT: mov w1, w8 -; CHECK-O0-AARCH64-NEXT: str x1, [sp, #96] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x1, [sp, #96] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #3 ; =0x3 ; CHECK-O0-AARCH64-NEXT: mov w2, w8 -; CHECK-O0-AARCH64-NEXT: str x2, [sp, #104] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x2, [sp, #104] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #4 ; =0x4 ; CHECK-O0-AARCH64-NEXT: mov w3, w8 -; CHECK-O0-AARCH64-NEXT: str x3, [sp, #112] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x3, [sp, #112] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #5 ; =0x5 ; CHECK-O0-AARCH64-NEXT: mov w4, w8 -; CHECK-O0-AARCH64-NEXT: str x4, [sp, #120] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x4, [sp, #120] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #6 ; =0x6 ; CHECK-O0-AARCH64-NEXT: mov w5, w8 -; CHECK-O0-AARCH64-NEXT: str x5, [sp, #128] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x5, [sp, #128] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov w8, #7 ; =0x7 ; CHECK-O0-AARCH64-NEXT: mov w6, w8 ; CHECK-O0-AARCH64-NEXT: stur x6, [x29, #-120] ; 8-byte Folded Spill @@ -1755,37 +1755,37 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-O0-AARCH64-NEXT: mov w7, w8 ; CHECK-O0-AARCH64-NEXT: stur x7, [x29, #-112] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: bl _params_in_reg2 -; CHECK-O0-AARCH64-NEXT: ldr x20, [sp] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x1, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x3, [sp, #32] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x4, [sp, #40] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x5, [sp, #48] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x6, [sp, #56] ; 8-byte Folded Reload -; CHECK-O0-AARCH64-NEXT: ldr x7, [sp, #64] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x20, [sp] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #8] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x1, [sp, #16] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x2, [sp, #24] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x3, [sp, #32] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x4, [sp, #40] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x5, [sp, #48] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x6, [sp, #56] ; 8-byte Reload +; CHECK-O0-AARCH64-NEXT: ldr x7, [sp, #64] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: mov x8, x21 -; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #72] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #72] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-104] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: bl _params_and_return_in_reg2 -; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #80] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #80] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: mov x8, x0 -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #88] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp, #88] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-96] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x8, x1 -; CHECK-O0-AARCH64-NEXT: ldr x1, [sp, #96] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x1, [sp, #96] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-88] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x8, x2 -; CHECK-O0-AARCH64-NEXT: ldr x2, [sp, #104] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x2, [sp, #104] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-80] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x8, x3 -; CHECK-O0-AARCH64-NEXT: ldr x3, [sp, #112] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x3, [sp, #112] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-72] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x8, x4 -; CHECK-O0-AARCH64-NEXT: ldr x4, [sp, #120] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x4, [sp, #120] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-64] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x8, x5 -; CHECK-O0-AARCH64-NEXT: ldr x5, [sp, #128] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x5, [sp, #128] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-56] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x8, x6 ; CHECK-O0-AARCH64-NEXT: ldur x6, [x29, #-120] ; 8-byte Folded Reload @@ -1815,105 +1815,105 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ ; CHECK-O0-ARM64_32-LABEL: params_and_return_in_reg: ; CHECK-O0-ARM64_32: ; %bb.0: ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #272 -; CHECK-O0-ARM64_32-NEXT: str x28, [sp, #240] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x28, [sp, #240] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: stp x20, x30, [sp, #256] ; 16-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 272 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -8 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w20, -16 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w28, -32 -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #72] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x20, [sp] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #64] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #56] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #48] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #40] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #32] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #24] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #16] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #72] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x20, [sp] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #64] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #56] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #48] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #40] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #32] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #24] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #16] ; 8-byte Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x0 ; CHECK-O0-ARM64_32-NEXT: mov x20, xzr -; CHECK-O0-ARM64_32-NEXT: str x20, [sp, #80] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x20, [sp, #80] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, x20 ; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 -; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #88] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #88] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #2 ; =0x2 ; CHECK-O0-ARM64_32-NEXT: mov w1, w8 -; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #96] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #96] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #3 ; =0x3 ; CHECK-O0-ARM64_32-NEXT: mov w2, w8 -; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #104] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #104] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #4 ; =0x4 ; CHECK-O0-ARM64_32-NEXT: mov w3, w8 -; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #112] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #112] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #5 ; =0x5 ; CHECK-O0-ARM64_32-NEXT: mov w4, w8 -; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #120] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #120] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #6 ; =0x6 ; CHECK-O0-ARM64_32-NEXT: mov w5, w8 -; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #128] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #128] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #7 ; =0x7 ; CHECK-O0-ARM64_32-NEXT: mov w6, w8 -; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #136] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #136] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-ARM64_32-NEXT: mov w7, w8 -; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #144] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #144] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: bl _params_in_reg2 -; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #16] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #24] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #32] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #40] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #48] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #56] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #64] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #8] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #16] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #24] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #32] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #40] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #48] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #56] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #64] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: mov x8, x21 -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #72] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #152] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #72] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #152] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: bl _params_and_return_in_reg2 -; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp, #80] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp, #80] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: mov x8, x0 -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #88] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #160] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #88] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #160] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x1 -; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #96] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #168] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #96] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #168] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x2 -; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #104] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #176] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #104] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #176] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x3 -; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #112] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #184] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #112] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #184] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x4 -; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #120] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #192] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #120] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #192] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x5 -; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #128] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #200] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #128] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #200] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x6 -; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #136] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #208] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #136] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #208] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x7 -; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #144] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #216] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #144] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #216] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x8, x21 -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #152] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #224] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #152] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: str x8, [sp, #224] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: bl _params_in_reg2 -; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #160] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #168] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #176] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #184] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #192] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #200] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #208] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #216] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x0, [sp, #160] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x1, [sp, #168] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x2, [sp, #176] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x3, [sp, #184] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x4, [sp, #192] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x5, [sp, #200] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x6, [sp, #208] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x7, [sp, #216] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: mov x8, x21 -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #224] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #224] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: ldp x20, x30, [sp, #256] ; 16-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x28, [sp, #240] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x28, [sp, #240] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #272 ; CHECK-O0-ARM64_32-NEXT: ret %error_ptr_ref = alloca swifterror ptr, align 8 @@ -1956,10 +1956,10 @@ define swiftcc void @tailcall_from_swifterror(ptr swifterror %error_ptr_ref) { ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: mov x0, xzr ; CHECK-O0-AARCH64-NEXT: bl _acallee -; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x21, [sp, #8] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: add sp, sp, #32 ; CHECK-O0-AARCH64-NEXT: ret @@ -1967,14 +1967,14 @@ define swiftcc void @tailcall_from_swifterror(ptr swifterror %error_ptr_ref) { ; CHECK-O0-ARM64_32-LABEL: tailcall_from_swifterror: ; CHECK-O0-ARM64_32: ; %bb.0: ; %entry ; CHECK-O0-ARM64_32-NEXT: sub sp, sp, #32 -; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #16] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #16] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 32 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 -; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: mov x0, xzr ; CHECK-O0-ARM64_32-NEXT: bl _acallee -; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload -; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x21, [sp, #8] ; 8-byte Reload +; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: add sp, sp, #32 ; CHECK-O0-ARM64_32-NEXT: ret entry: @@ -2019,9 +2019,9 @@ define swiftcc ptr @testAssign(ptr %error_ref) { ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x1 ; CHECK-O0-AARCH64-NEXT: mov x21, xzr ; CHECK-O0-AARCH64-NEXT: bl _foo2 -; CHECK-O0-AARCH64-NEXT: str x21, [sp] ; 8-byte Folded Spill +; CHECK-O0-AARCH64-NEXT: str x21, [sp] ; 8-byte Spill ; CHECK-O0-AARCH64-NEXT: ; %bb.1: ; %a -; CHECK-O0-AARCH64-NEXT: ldr x0, [sp] ; 8-byte Folded Reload +; CHECK-O0-AARCH64-NEXT: ldr x0, [sp] ; 8-byte Reload ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: ldp x22, x21, [sp, #16] ; 16-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: add sp, sp, #48 @@ -2040,9 +2040,9 @@ define swiftcc ptr @testAssign(ptr %error_ref) { ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x1 ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr ; CHECK-O0-ARM64_32-NEXT: bl _foo2 -; CHECK-O0-ARM64_32-NEXT: str x21, [sp] ; 8-byte Folded Spill +; CHECK-O0-ARM64_32-NEXT: str x21, [sp] ; 8-byte Spill ; CHECK-O0-ARM64_32-NEXT: ; %bb.1: ; %a -; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp] ; 8-byte Folded Reload +; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp] ; 8-byte Reload ; CHECK-O0-ARM64_32-NEXT: and x0, x8, #0xffffffff ; CHECK-O0-ARM64_32-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-O0-ARM64_32-NEXT: ldp x22, x21, [sp, #16] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll index 3e933fadc4fa2..12e3738b13c5b 100644 --- a/llvm/test/CodeGen/AArch64/trampoline.ll +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -72,7 +72,7 @@ define i64 @func1() { ; CHECK-LINUX-LABEL: func1: ; CHECK-LINUX: // %bb.0: ; CHECK-LINUX-NEXT: sub sp, sp, #64 -; CHECK-LINUX-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-LINUX-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 64 ; CHECK-LINUX-NEXT: .cfi_offset w30, -16 ; CHECK-LINUX-NEXT: adrp x8, :got:f @@ -91,7 +91,7 @@ define i64 @func1() { ; CHECK-LINUX-NEXT: add x8, sp, #8 ; CHECK-LINUX-NEXT: add x1, x8, #12 ; CHECK-LINUX-NEXT: bl __clear_cache -; CHECK-LINUX-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-LINUX-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-LINUX-NEXT: mov x0, xzr ; CHECK-LINUX-NEXT: add sp, sp, #64 ; CHECK-LINUX-NEXT: ret @@ -101,7 +101,7 @@ define i64 @func1() { ; CHECK-PC-NEXT: // %bb.0: ; CHECK-PC-NEXT: sub sp, sp, #64 ; CHECK-PC-NEXT: .seh_stackalloc 64 -; CHECK-PC-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-PC-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-PC-NEXT: .seh_save_reg x30, 48 ; CHECK-PC-NEXT: .seh_endprologue ; CHECK-PC-NEXT: adrp x8, f @@ -122,7 +122,7 @@ define i64 @func1() { ; CHECK-PC-NEXT: bl __clear_cache ; CHECK-PC-NEXT: mov x0, xzr ; CHECK-PC-NEXT: .seh_startepilogue -; CHECK-PC-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-PC-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-PC-NEXT: .seh_save_reg x30, 48 ; CHECK-PC-NEXT: add sp, sp, #64 ; CHECK-PC-NEXT: .seh_stackalloc 64 diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll index 7e1f63d822273..c1062561733c3 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -14,18 +14,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 16 + 144 * VG -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -79,18 +79,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -128,18 +128,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -166,18 +166,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: .cfi_offset w29, -16 ; GISEL-NEXT: addvl sp, sp, #-18 ; GISEL-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 16 + 144 * VG -; GISEL-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; GISEL-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; GISEL-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; GISEL-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; GISEL-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; GISEL-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; GISEL-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; GISEL-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; GISEL-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; GISEL-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; GISEL-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; GISEL-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; GISEL-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; GISEL-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; GISEL-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill ; GISEL-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill ; GISEL-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill @@ -231,18 +231,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; GISEL-NEXT: addvl sp, sp, #18 ; GISEL-NEXT: .cfi_def_cfa wsp, 16 ; GISEL-NEXT: .cfi_restore z8 @@ -280,18 +280,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; GISEL-NEXT: addvl sp, sp, #18 ; GISEL-NEXT: .cfi_def_cfa wsp, 16 ; GISEL-NEXT: .cfi_restore z8 @@ -354,14 +354,14 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; CHECK-NEXT: .cfi_offset b22, -256 ; CHECK-NEXT: .cfi_offset b23, -272 ; CHECK-NEXT: .cfi_remember_state -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: .Ltmp3: // EH_LABEL ; CHECK-NEXT: bl may_throw_neon ; CHECK-NEXT: .Ltmp4: // EH_LABEL -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: b .LBB1_1 ; CHECK-NEXT: .LBB1_1: // %.Lcontinue -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -395,7 +395,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; CHECK-NEXT: .LBB1_2: // %.Lunwind ; CHECK-NEXT: .cfi_restore_state ; CHECK-NEXT: .Ltmp5: // EH_LABEL -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -461,14 +461,14 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; GISEL-NEXT: .cfi_offset b22, -256 ; GISEL-NEXT: .cfi_offset b23, -272 ; GISEL-NEXT: .cfi_remember_state -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp] // 16-byte Spill ; GISEL-NEXT: .Ltmp3: // EH_LABEL ; GISEL-NEXT: bl may_throw_neon -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Spill ; GISEL-NEXT: .Ltmp4: // EH_LABEL ; GISEL-NEXT: b .LBB1_1 ; GISEL-NEXT: .LBB1_1: // %.Lcontinue -; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -502,7 +502,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) ; GISEL-NEXT: .LBB1_2: // %.Lunwind ; GISEL-NEXT: .cfi_restore_state ; GISEL-NEXT: .Ltmp5: // EH_LABEL -; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp] // 16-byte Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll index 854a2d75d4b86..0b7690e9f74c7 100644 --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -60,18 +60,18 @@ define <2 x float> @sin_v2f32(<2 x float> %x) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #48 @@ -84,25 +84,25 @@ define <3 x float> @sin_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: sin_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl sinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -115,32 +115,32 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind { ; CHECK-LABEL: sin_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl sinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-NEXT: bl sinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -159,7 +159,7 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { ; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: fmov s8, s4 ; CHECK-NEXT: fmov s9, s3 -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s12, s0 ; CHECK-NEXT: fmov s0, s11 @@ -175,7 +175,7 @@ define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s1, s11 ; CHECK-NEXT: fmov s2, s10 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Reload ; CHECK-NEXT: fmov s3, s9 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload @@ -197,7 +197,7 @@ define <6 x float> @sin_v6f32(<6 x float> %x) nounwind { ; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s8, s5 ; CHECK-NEXT: fmov s9, s4 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: fmov s12, s1 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s13, s0 @@ -217,7 +217,7 @@ define <6 x float> @sin_v6f32(<6 x float> %x) nounwind { ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s2, s11 ; CHECK-NEXT: fmov s3, s10 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: fmov s4, s9 ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload @@ -237,7 +237,7 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: fmov d8, d2 ; CHECK-NEXT: fmov d9, d1 -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: bl sin ; CHECK-NEXT: fmov d10, d0 ; CHECK-NEXT: fmov d0, d9 @@ -247,7 +247,7 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { ; CHECK-NEXT: bl sin ; CHECK-NEXT: fmov d1, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: fmov d0, d10 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload @@ -278,25 +278,25 @@ define <3 x float> @cos_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: cos_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl cosf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl cosf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl cosf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -309,25 +309,25 @@ define <3 x float> @tan_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: tan_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl tanf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl tanf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl tanf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -340,25 +340,25 @@ define <3 x float> @asin_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: asin_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl asinf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl asinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl asinf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -371,25 +371,25 @@ define <3 x float> @acos_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: acos_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl acosf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl acosf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl acosf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -402,25 +402,25 @@ define <3 x float> @atan_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: atan_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl atanf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl atanf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl atanf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -436,25 +436,25 @@ define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind { ; CHECK-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Spill ; CHECK-NEXT: bl atan2f ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-NEXT: bl atan2f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: mov s1, v1.s[2] ; CHECK-NEXT: bl atan2f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #64 @@ -467,25 +467,25 @@ define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: sinh_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl sinhf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sinhf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl sinhf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -497,25 +497,25 @@ define <3 x float> @cosh_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: cosh_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl coshf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl coshf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl coshf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -528,25 +528,25 @@ define <3 x float> @tanh_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: tanh_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl tanhf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl tanhf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl tanhf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -559,25 +559,25 @@ define <3 x float> @exp_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: exp_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl expf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl expf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl expf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -590,25 +590,25 @@ define <3 x float> @exp2_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: exp2_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl exp2f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl exp2f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -630,25 +630,25 @@ define <3 x float> @log_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: log_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl logf ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl logf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl logf -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -661,25 +661,25 @@ define <3 x float> @log10_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: log10_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl log10f ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl log10f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl log10f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 @@ -692,25 +692,25 @@ define <3 x float> @log2_v3f32(<3 x float> %x) nounwind { ; CHECK-LABEL: log2_v3f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl log2f ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl log2f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl log2f -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #48 diff --git a/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll b/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll index 78dbc09a57afd..86442ec4b3039 100644 --- a/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll +++ b/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll @@ -108,14 +108,14 @@ define <4 x float> @modf_store_merging_load_before_store(<4 x float> %x, ptr %ou ; SLEEF-LABEL: modf_store_merging_load_before_store: ; SLEEF: // %bb.0: ; SLEEF-NEXT: sub sp, sp, #32 -; SLEEF-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; SLEEF-NEXT: str x30, [sp, #16] // 8-byte Spill ; SLEEF-NEXT: .cfi_def_cfa_offset 32 ; SLEEF-NEXT: .cfi_offset w30, -16 ; SLEEF-NEXT: ldr q1, [x0] -; SLEEF-NEXT: str q1, [sp] // 16-byte Folded Spill +; SLEEF-NEXT: str q1, [sp] // 16-byte Spill ; SLEEF-NEXT: bl _ZGVnN4vl4_modff -; SLEEF-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; SLEEF-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; SLEEF-NEXT: ldr q1, [sp] // 16-byte Reload +; SLEEF-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; SLEEF-NEXT: fadd v0.4s, v1.4s, v0.4s ; SLEEF-NEXT: add sp, sp, #32 ; SLEEF-NEXT: ret @@ -123,14 +123,14 @@ define <4 x float> @modf_store_merging_load_before_store(<4 x float> %x, ptr %ou ; ARMPL-LABEL: modf_store_merging_load_before_store: ; ARMPL: // %bb.0: ; ARMPL-NEXT: sub sp, sp, #32 -; ARMPL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; ARMPL-NEXT: str x30, [sp, #16] // 8-byte Spill ; ARMPL-NEXT: .cfi_def_cfa_offset 32 ; ARMPL-NEXT: .cfi_offset w30, -16 ; ARMPL-NEXT: ldr q1, [x0] -; ARMPL-NEXT: str q1, [sp] // 16-byte Folded Spill +; ARMPL-NEXT: str q1, [sp] // 16-byte Spill ; ARMPL-NEXT: bl armpl_vmodfq_f32 -; ARMPL-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; ARMPL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; ARMPL-NEXT: ldr q1, [sp] // 16-byte Reload +; ARMPL-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; ARMPL-NEXT: fadd v0.4s, v1.4s, v0.4s ; ARMPL-NEXT: add sp, sp, #32 ; ARMPL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll index 3fc8d6f78296c..c0418798e51df 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -170,13 +170,13 @@ define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind { ; CHECK-SD-LABEL: test_v2f128: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #32 -; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-SD-NEXT: bl __addtf3 -; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-SD-NEXT: add sp, sp, #32 ; CHECK-SD-NEXT: b __addtf3 ; @@ -184,14 +184,14 @@ define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #32 ; CHECK-GI-NEXT: mov v3.16b, v0.16b -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v3.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a) @@ -208,14 +208,14 @@ define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind { ; CHECK-GI-NEXT: sub sp, sp, #32 ; CHECK-GI-NEXT: mov v2.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI13_0 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload ; CHECK-GI-NEXT: bl __addtf3 -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index a2f4ccd369fb4..d81952087086e 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -626,32 +626,32 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NOFP-SD-LABEL: test_v2f128: ; CHECK-NOFP-SD: // %bb.0: ; CHECK-NOFP-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-NOFP-SD-NEXT: bl __gttf2 -; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP-SD-NEXT: cmp w0, #0 ; CHECK-NOFP-SD-NEXT: b.le .LBB18_2 ; CHECK-NOFP-SD-NEXT: // %bb.1: -; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP-SD-NEXT: .LBB18_2: -; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-SD-LABEL: test_v2f128: ; CHECK-FP-SD: // %bb.0: ; CHECK-FP-SD-NEXT: sub sp, sp, #48 -; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-FP-SD-NEXT: bl __gttf2 -; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP-SD-NEXT: cmp w0, #0 ; CHECK-FP-SD-NEXT: b.le .LBB18_2 ; CHECK-FP-SD-NEXT: // %bb.1: -; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP-SD-NEXT: .LBB18_2: -; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP-SD-NEXT: add sp, sp, #48 ; CHECK-FP-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index c5109c8e63497..039628c37049c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -626,32 +626,32 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-NOFP-SD-LABEL: test_v2f128: ; CHECK-NOFP-SD: // %bb.0: ; CHECK-NOFP-SD-NEXT: sub sp, sp, #48 -; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NOFP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-NOFP-SD-NEXT: bl __lttf2 -; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NOFP-SD-NEXT: cmp w0, #0 ; CHECK-NOFP-SD-NEXT: b.pl .LBB18_2 ; CHECK-NOFP-SD-NEXT: // %bb.1: -; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NOFP-SD-NEXT: .LBB18_2: -; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NOFP-SD-NEXT: add sp, sp, #48 ; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-SD-LABEL: test_v2f128: ; CHECK-FP-SD: // %bb.0: ; CHECK-FP-SD-NEXT: sub sp, sp, #48 -; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-FP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-FP-SD-NEXT: bl __lttf2 -; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-FP-SD-NEXT: cmp w0, #0 ; CHECK-FP-SD-NEXT: b.pl .LBB18_2 ; CHECK-FP-SD-NEXT: // %bb.1: -; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-FP-SD-NEXT: .LBB18_2: -; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-FP-SD-NEXT: add sp, sp, #48 ; CHECK-FP-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index 3547c6f092b41..45c9e4c9c7194 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -692,17 +692,17 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -715,28 +715,28 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d1, x0 ; CHECK-NEXT: ldp q0, q4, [sp, #16] // 32-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-NEXT: mov v1.d[1], v4.d[0] ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret @@ -749,53 +749,53 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #144 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Spill ; CHECK-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d3, x0 ; CHECK-NEXT: ldp q0, q1, [sp, #80] // 32-byte Folded Reload -; CHECK-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: ldr q2, [sp, #64] // 16-byte Reload +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-NEXT: mov v3.d[1], v1.d[0] -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: add sp, sp, #144 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) @@ -807,108 +807,108 @@ define <16 x i64> @llrint_v16f128(<16 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v16f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #272 -; CHECK-NEXT: str q2, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #160] // 16-byte Spill ; CHECK-NEXT: ldr q2, [sp, #368] ; CHECK-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: str q2, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #240] // 16-byte Spill ; CHECK-NEXT: ldr q2, [sp, #384] ; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: str q2, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #224] // 16-byte Spill ; CHECK-NEXT: ldr q2, [sp, #336] ; CHECK-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: str q2, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #192] // 16-byte Spill ; CHECK-NEXT: ldr q2, [sp, #352] -; CHECK-NEXT: str q2, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #176] // 16-byte Spill ; CHECK-NEXT: ldr q2, [sp, #304] -; CHECK-NEXT: str q2, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #144] // 16-byte Spill ; CHECK-NEXT: ldr q2, [sp, #320] ; CHECK-NEXT: stp q4, q2, [sp, #112] // 32-byte Folded Spill ; CHECK-NEXT: ldr q2, [sp, #272] ; CHECK-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill ; CHECK-NEXT: ldr q2, [sp, #288] -; CHECK-NEXT: str q2, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #64] // 16-byte Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #208] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #144] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #176] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #176] // 16-byte Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d7, x0 ; CHECK-NEXT: ldp q0, q1, [sp, #208] // 32-byte Folded Reload ; CHECK-NEXT: ldp q4, q2, [sp, #96] // 32-byte Folded Reload -; CHECK-NEXT: ldr q3, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr q3, [sp, #80] // 16-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload -; CHECK-NEXT: ldr q6, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: ldr q6, [sp, #192] // 16-byte Reload ; CHECK-NEXT: mov v7.d[1], v1.d[0] ; CHECK-NEXT: ldp q5, q1, [sp, #144] // 32-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 65839b21b5356..c226ec3c3e25c 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1371,15 +1371,15 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #48 -; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Spill +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload +; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-i32-NEXT: add sp, sp, #48 @@ -1388,17 +1388,17 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-i64-LABEL: lrint_v2fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #48 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b -; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Reload +; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] ; CHECK-i64-NEXT: add sp, sp, #48 ; CHECK-i64-NEXT: ret @@ -1411,26 +1411,26 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #80 -; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp] // 16-byte Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload +; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-i32-NEXT: mov v0.s[3], w0 ; CHECK-i32-NEXT: add sp, sp, #80 ; CHECK-i32-NEXT: ret @@ -1438,28 +1438,28 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-i64-LABEL: lrint_v4fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #80 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b -; CHECK-i64-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-i64-NEXT: str x30, [sp, #64] // 8-byte Spill ; CHECK-i64-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d1, x0 ; CHECK-i64-NEXT: ldp q0, q4, [sp, #16] // 32-byte Folded Reload -; CHECK-i64-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-i64-NEXT: ldr x30, [sp, #64] // 8-byte Reload ; CHECK-i64-NEXT: mov v1.d[1], v4.d[0] ; CHECK-i64-NEXT: add sp, sp, #80 ; CHECK-i64-NEXT: ret @@ -1472,47 +1472,47 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #144 -; CHECK-i32-NEXT: str x30, [sp, #128] // 8-byte Folded Spill -; CHECK-i32-NEXT: str q4, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: str x30, [sp, #128] // 8-byte Spill +; CHECK-i32-NEXT: str q4, [sp, #96] // 16-byte Spill ; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: ldp q1, q0, [sp, #96] // 32-byte Folded Reload -; CHECK-i32-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-i32-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-i32-NEXT: mov v1.s[3], w0 ; CHECK-i32-NEXT: add sp, sp, #144 ; CHECK-i32-NEXT: ret @@ -1520,53 +1520,53 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i64-LABEL: lrint_v8fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #144 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b -; CHECK-i64-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-i64-NEXT: str x30, [sp, #128] // 8-byte Spill ; CHECK-i64-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-i64-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill ; CHECK-i64-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d3, x0 ; CHECK-i64-NEXT: ldp q0, q1, [sp, #80] // 32-byte Folded Reload -; CHECK-i64-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload -; CHECK-i64-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-i64-NEXT: ldr q2, [sp, #64] // 16-byte Reload +; CHECK-i64-NEXT: ldr x30, [sp, #128] // 8-byte Reload ; CHECK-i64-NEXT: mov v3.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: add sp, sp, #144 ; CHECK-i64-NEXT: ret %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) @@ -1581,94 +1581,94 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #384] ; CHECK-i32-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #368] ; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #352] ; CHECK-i32-NEXT: stp q7, q4, [sp, #208] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #336] -; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #320] -; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #304] -; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Spill ; CHECK-i32-NEXT: ldr q1, [sp, #288] ; CHECK-i32-NEXT: stp q6, q1, [sp, #80] // 32-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #272] -; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: ldp q3, q2, [sp, #192] // 32-byte Folded Reload ; CHECK-i32-NEXT: ldp q1, q0, [sp, #224] // 32-byte Folded Reload @@ -1680,108 +1680,108 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-i64-LABEL: lrint_v16fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #272 -; CHECK-i64-NEXT: str q2, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #160] // 16-byte Spill ; CHECK-i64-NEXT: ldr q2, [sp, #368] ; CHECK-i64-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b -; CHECK-i64-NEXT: str q2, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #240] // 16-byte Spill ; CHECK-i64-NEXT: ldr q2, [sp, #384] ; CHECK-i64-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-i64-NEXT: str q2, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #224] // 16-byte Spill ; CHECK-i64-NEXT: ldr q2, [sp, #336] ; CHECK-i64-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill -; CHECK-i64-NEXT: str q2, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #192] // 16-byte Spill ; CHECK-i64-NEXT: ldr q2, [sp, #352] -; CHECK-i64-NEXT: str q2, [sp, #176] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #176] // 16-byte Spill ; CHECK-i64-NEXT: ldr q2, [sp, #304] -; CHECK-i64-NEXT: str q2, [sp, #144] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #144] // 16-byte Spill ; CHECK-i64-NEXT: ldr q2, [sp, #320] ; CHECK-i64-NEXT: stp q4, q2, [sp, #112] // 32-byte Folded Spill ; CHECK-i64-NEXT: ldr q2, [sp, #272] ; CHECK-i64-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill ; CHECK-i64-NEXT: ldr q2, [sp, #288] -; CHECK-i64-NEXT: str q2, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #64] // 16-byte Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #208] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q1, [sp, #176] // 16-byte Reload ; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Spill +; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Reload ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d7, x0 ; CHECK-i64-NEXT: ldp q0, q1, [sp, #208] // 32-byte Folded Reload ; CHECK-i64-NEXT: ldp q4, q2, [sp, #96] // 32-byte Folded Reload -; CHECK-i64-NEXT: ldr q3, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q3, [sp, #80] // 16-byte Reload ; CHECK-i64-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload -; CHECK-i64-NEXT: ldr q6, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q6, [sp, #192] // 16-byte Reload ; CHECK-i64-NEXT: mov v7.d[1], v1.d[0] ; CHECK-i64-NEXT: ldp q5, q1, [sp, #144] // 32-byte Folded Reload ; CHECK-i64-NEXT: add sp, sp, #272 diff --git a/llvm/test/CodeGen/AArch64/win-sve.ll b/llvm/test/CodeGen/AArch64/win-sve.ll index 3ba4a1c10a024..dea7781ba16e2 100644 --- a/llvm/test/CodeGen/AArch64/win-sve.ll +++ b/llvm/test/CodeGen/AArch64/win-sve.ll @@ -9,29 +9,29 @@ define i32 @f( %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -67,13 +67,13 @@ define i32 @f( %x) { ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x28, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x28, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: bl g ; CHECK-NEXT: mov w0, #3 // =0x3 ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x28, 16 @@ -109,29 +109,29 @@ define i32 @f( %x) { ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -150,29 +150,29 @@ define void @f2(i64 %n, %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -208,7 +208,7 @@ define void @f2(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x19, 32 -; CHECK-NEXT: str x28, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 16 @@ -230,7 +230,7 @@ define void @f2(i64 %n, %x) { ; CHECK-NEXT: .seh_add_fp 16 ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 16 -; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x19, 32 @@ -266,29 +266,29 @@ define void @f2(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -309,29 +309,29 @@ define void @f3(i64 %n, %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -367,7 +367,7 @@ define void @f3(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x28, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x28, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .seh_stackalloc 16 @@ -378,7 +378,7 @@ define void @f3(i64 %n, %x) { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: .seh_stackalloc 16 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x28, 16 @@ -414,29 +414,29 @@ define void @f3(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -457,29 +457,29 @@ define void @f4(i64 %n, %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -515,7 +515,7 @@ define void @f4(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x28, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x28, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .seh_stackalloc 16 @@ -530,7 +530,7 @@ define void @f4(i64 %n, %x) { ; CHECK-NEXT: .seh_allocz 1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: .seh_stackalloc 16 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x28, 16 @@ -566,29 +566,29 @@ define void @f4(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -609,29 +609,29 @@ define void @f5(i64 %n, %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -667,7 +667,7 @@ define void @f5(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x19, 32 -; CHECK-NEXT: str x28, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 16 @@ -692,7 +692,7 @@ define void @f5(i64 %n, %x) { ; CHECK-NEXT: .seh_add_fp 16 ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 16 -; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x19, 32 @@ -728,29 +728,29 @@ define void @f5(i64 %n, %x) { ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -778,29 +778,29 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -836,7 +836,7 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x19, 32 -; CHECK-NEXT: str x28, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 16 @@ -860,7 +860,7 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_stackalloc 64 ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 16 -; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x19, 32 @@ -896,29 +896,29 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -941,29 +941,29 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .LBB5_2: // %ehcleanup ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -999,7 +999,7 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_zreg z23, 17 ; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x19, 32 -; CHECK-NEXT: str x28, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 16 @@ -1008,7 +1008,7 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 16 -; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x28, 8 ; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x19, 32 @@ -1044,29 +1044,29 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -1091,7 +1091,7 @@ define void @f7(i64 %n) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str x28, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x28, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .seh_allocz 1 @@ -1101,7 +1101,7 @@ define void @f7(i64 %n) { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .seh_allocz 1 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x28, 16 @@ -1203,29 +1203,29 @@ define void @f10(i64 %n, %x) "frame-pointer"="all" { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: .seh_allocz 18 -; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #1, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #2, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #3, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #4, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #5, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #6, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #7, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #8, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #9, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #10, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #11, mul vl] // 2-byte Spill ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_zreg z8, 2 @@ -1313,29 +1313,29 @@ define void @f10(i64 %n, %x) "frame-pointer"="all" { ; CHECK-NEXT: .seh_save_zreg z22, 16 ; CHECK-NEXT: ldr z23, [sp, #17, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z23, 17 -; CHECK-NEXT: ldr p4, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p4, 0 -; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #1, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p5, 1 -; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #2, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p6, 2 -; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p7, 3 -; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #4, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p8, 4 -; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p9, 5 -; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p10, 6 -; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #7, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p11, 7 -; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #8, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p12, 8 -; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #9, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p13, 9 -; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #10, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p14, 10 -; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Reload ; CHECK-NEXT: .seh_save_preg p15, 11 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 @@ -1520,7 +1520,7 @@ define tailcc void @f15(double %d, %vs, [9 x i64], i32 %i) { ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: str x28, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg_x x28, 16 -; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .seh_stackalloc 16 @@ -1538,7 +1538,7 @@ define tailcc void @f15(double %d, %vs, [9 x i64], i32 %i) { ; CHECK-NEXT: .seh_allocz 1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: .seh_stackalloc 16 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 8 ; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg_x x28, 16 diff --git a/llvm/test/CodeGen/AArch64/win64-fpowi.ll b/llvm/test/CodeGen/AArch64/win64-fpowi.ll index 3eb74f8394ec4..5819e0cce3969 100644 --- a/llvm/test/CodeGen/AArch64/win64-fpowi.ll +++ b/llvm/test/CodeGen/AArch64/win64-fpowi.ll @@ -51,29 +51,29 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) { ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: .seh_stackalloc 48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 32 -; CHECK-NEXT: str d8, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: str d8, [sp, #40] // 8-byte Spill ; CHECK-NEXT: .seh_save_freg d8, 40 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: scvtf d8, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: fmov d1, d8 ; CHECK-NEXT: bl pow ; CHECK-NEXT: fmov d1, d8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl pow -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Reload ; CHECK-NEXT: .seh_save_freg d8, 40 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 32 ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: .seh_stackalloc 48 @@ -92,31 +92,31 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) { ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: .seh_stackalloc 48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 32 -; CHECK-NEXT: str d8, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: str d8, [sp, #40] // 8-byte Spill ; CHECK-NEXT: .seh_save_freg d8, 40 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: scvtf s8, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] // 16-byte Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: fmov s1, s8 ; CHECK-NEXT: bl powf ; CHECK-NEXT: fmov s1, s8 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl powf -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Reload ; CHECK-NEXT: .seh_save_freg d8, 40 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 32 ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: .seh_stackalloc 48 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll index d72dee9021251..0f4cb44a6f73b 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -109,7 +109,7 @@ define i32 @fp(ptr, i64, ptr, ...) local_unnamed_addr #6 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stp x19, x20, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_regp_x x19, 96 -; CHECK-NEXT: str x21, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x21, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x21, 16 ; CHECK-NEXT: stp x29, x30, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 24 @@ -138,7 +138,7 @@ define i32 @fp(ptr, i64, ptr, ...) local_unnamed_addr #6 { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 24 -; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x21, 16 ; CHECK-NEXT: ldp x19, x20, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_regp_x x19, 96 @@ -171,7 +171,7 @@ define void @vla(i32, ptr, ...) local_unnamed_addr { ; CHECK-NEXT: .seh_save_regp_x x19, 112 ; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_regp x21, 16 -; CHECK-NEXT: str x23, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x23, [sp, #32] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x23, 32 ; CHECK-NEXT: stp x29, x30, [sp, #40] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_fplr 40 @@ -209,7 +209,7 @@ define void @vla(i32, ptr, ...) local_unnamed_addr { ; CHECK-NEXT: .seh_add_fp 40 ; CHECK-NEXT: ldp x29, x30, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 40 -; CHECK-NEXT: ldr x23, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x23, [sp, #32] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x23, 32 ; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_regp x21, 16 @@ -308,11 +308,11 @@ define void @fixed_params(i32, double, i32, double, i32, double, i32, double, i3 ; CHECK-NEXT: fmov x3, d1 ; CHECK-NEXT: fmov x5, d2 ; CHECK-NEXT: fmov x7, d3 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Spill ; CHECK-NEXT: str d4, [sp, #8] ; CHECK-NEXT: str w8, [sp] ; CHECK-NEXT: bl varargs -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret tail call void (i32, ...) @varargs(i32 %0, double %1, i32 %2, double %3, i32 %4, double %5, i32 %6, double %7, i32 %8, double %9) diff --git a/llvm/test/CodeGen/AArch64/win64_vararg2.ll b/llvm/test/CodeGen/AArch64/win64_vararg2.ll index 2d3156a3aadac..548e6ac5fc0fe 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg2.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg2.ll @@ -9,9 +9,9 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: .seh_stackalloc 80 -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x19, 16 -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 24 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: add x8, sp, #40 @@ -27,9 +27,9 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; CHECK-NEXT: cmp w19, w0 ; CHECK-NEXT: cset w0, ls ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x30, 24 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; CHECK-NEXT: .seh_save_reg x19, 16 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: .seh_stackalloc 80 @@ -43,9 +43,9 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; GISEL-NEXT: // %bb.0: ; GISEL-NEXT: sub sp, sp, #80 ; GISEL-NEXT: .seh_stackalloc 80 -; GISEL-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; GISEL-NEXT: str x19, [sp, #16] // 8-byte Spill ; GISEL-NEXT: .seh_save_reg x19, 16 -; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill ; GISEL-NEXT: .seh_save_reg x30, 24 ; GISEL-NEXT: .seh_endprologue ; GISEL-NEXT: add x8, sp, #40 @@ -61,9 +61,9 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; GISEL-NEXT: cmp w19, w0 ; GISEL-NEXT: cset w0, ls ; GISEL-NEXT: .seh_startepilogue -; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload ; GISEL-NEXT: .seh_save_reg x30, 24 -; GISEL-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; GISEL-NEXT: ldr x19, [sp, #16] // 8-byte Reload ; GISEL-NEXT: .seh_save_reg x19, 16 ; GISEL-NEXT: add sp, sp, #80 ; GISEL-NEXT: .seh_stackalloc 80 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg_float.ll b/llvm/test/CodeGen/AArch64/win64_vararg_float.ll index 842f48941421d..a99b29fa3bef9 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg_float.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg_float.ll @@ -21,7 +21,7 @@ define void @float_va_fn(float %a, i32 %b, ...) nounwind { ; O0-LABEL: float_va_fn: ; O0: // %bb.0: // %entry ; O0-NEXT: sub sp, sp, #80 -; O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; O0-NEXT: str x7, [sp, #72] ; O0-NEXT: str x6, [sp, #64] ; O0-NEXT: str x5, [sp, #56] @@ -33,7 +33,7 @@ define void @float_va_fn(float %a, i32 %b, ...) nounwind { ; O0-NEXT: str x8, [sp, #8] ; O0-NEXT: ldr x0, [sp, #8] ; O0-NEXT: bl f_va_list -; O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; O0-NEXT: add sp, sp, #80 ; O0-NEXT: ret entry: @@ -71,7 +71,7 @@ define void @double_va_fn(double %a, i32 %b, ...) nounwind { ; O0-LABEL: double_va_fn: ; O0: // %bb.0: // %entry ; O0-NEXT: sub sp, sp, #80 -; O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; O0-NEXT: str x7, [sp, #72] ; O0-NEXT: str x6, [sp, #64] ; O0-NEXT: str x5, [sp, #56] @@ -83,7 +83,7 @@ define void @double_va_fn(double %a, i32 %b, ...) nounwind { ; O0-NEXT: str x8, [sp, #8] ; O0-NEXT: ldr x0, [sp, #8] ; O0-NEXT: bl d_va_list -; O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; O0-NEXT: add sp, sp, #80 ; O0-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll b/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll index 90f878327abc8..029d2da43b691 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll @@ -34,7 +34,7 @@ define win64cc void @float_va_fn(float %a, i32 %b, ...) nounwind { ; O0-LABEL: float_va_fn: ; O0: // %bb.0: // %entry ; O0-NEXT: sub sp, sp, #80 -; O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; O0-NEXT: str x7, [sp, #72] ; O0-NEXT: str x6, [sp, #64] ; O0-NEXT: str x5, [sp, #56] @@ -46,7 +46,7 @@ define win64cc void @float_va_fn(float %a, i32 %b, ...) nounwind { ; O0-NEXT: str x8, [sp, #8] ; O0-NEXT: ldr x0, [sp, #8] ; O0-NEXT: bl f_va_list -; O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; O0-NEXT: add sp, sp, #80 ; O0-NEXT: ret entry: @@ -84,7 +84,7 @@ define win64cc void @double_va_fn(double %a, i32 %b, ...) nounwind { ; O0-LABEL: double_va_fn: ; O0: // %bb.0: // %entry ; O0-NEXT: sub sp, sp, #80 -; O0-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-NEXT: str x30, [sp, #16] // 8-byte Spill ; O0-NEXT: str x7, [sp, #72] ; O0-NEXT: str x6, [sp, #64] ; O0-NEXT: str x5, [sp, #56] @@ -96,7 +96,7 @@ define win64cc void @double_va_fn(double %a, i32 %b, ...) nounwind { ; O0-NEXT: str x8, [sp, #8] ; O0-NEXT: ldr x0, [sp, #8] ; O0-NEXT: bl d_va_list -; O0-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-NEXT: ldr x30, [sp, #16] // 8-byte Reload ; O0-NEXT: add sp, sp, #80 ; O0-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll b/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll index 936ee3ca9e392..91c7bfb091b45 100644 --- a/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll +++ b/llvm/test/CodeGen/AArch64/win64cc-backup-x18.ll @@ -12,10 +12,10 @@ define dso_local win64cc void @func() #0 { ; CHECK-LABEL: func: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x18, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x18, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: bl other -; CHECK-NEXT: ldr x18, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x18, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/wincfi-missing-seh-directives.ll b/llvm/test/CodeGen/AArch64/wincfi-missing-seh-directives.ll index 6d14abdc2ed75..a7accf5a5a0e4 100644 --- a/llvm/test/CodeGen/AArch64/wincfi-missing-seh-directives.ll +++ b/llvm/test/CodeGen/AArch64/wincfi-missing-seh-directives.ll @@ -9,9 +9,9 @@ ; CHECK-NEXT: .seh_proc foo ; CHECK: sub sp, sp, #496 ; CHECK-NEXT: .seh_stackalloc 496 -; CHECK-NEXT: str x19, [sp, #208] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #208] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x19, 208 -; CHECK-NEXT: str x21, [sp, #216] // 8-byte Folded Spill +; CHECK-NEXT: str x21, [sp, #216] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x21, 216 ; CHECK-NEXT: stp x23, x24, [sp, #224] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_regp x23, 224 @@ -19,7 +19,7 @@ ; CHECK-NEXT: .seh_save_regp x25, 240 ; CHECK-NEXT: stp x27, x28, [sp, #256] // 16-byte Folded Spill ; CHECK-NEXT: .seh_save_regp x27, 256 -; CHECK-NEXT: str x30, [sp, #272] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #272] // 8-byte Spill ; CHECK-NEXT: .seh_save_reg x30, 272 ; CHECK-NEXT: .seh_endprologue diff --git a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir new file mode 100644 index 0000000000000..4b3e8eab3e1e4 --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir @@ -0,0 +1,190 @@ +# Test to ensure that variable "__last" is properly recovered at the end of the livedebugvalues pass when Instruction Referencing-based LiveDebugValues is used. +# This testcase was obtained by looking at FileCheck.cpp and reducing it down via llvm-reduce. +# This test is the same as llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll, however, the output is taken just before the livedebugvalues pass, and then a clobber +# to the stack slot has been added after the first LDRXui in bb.2.if.then13, the livedebugvalues pass should still recover the value, as it was loaded into $x8 before the clobber. + +# REQUIRES: system-darwin + +# RUN: llc -o - %s -run-pass=livedebugvalues | FileCheck %s + +# CHECK: ![[LOC:[0-9]+]] = !DILocalVariable(name: "__last", +# CHECK-LABEL: bb.2.if.then13 +# CHECK: STRXui $xzr, $sp, 1 +# CHECK-NEXT: DBG_VALUE_LIST ![[LOC]], !DIExpression(DW_OP_LLVM_arg, 0), $x8 + +--- | + ; ModuleID = '/Users/srastogi/Development/llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll' + source_filename = "/Users/srastogi/Development/llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + declare void @_ZdlPvm() + + define fastcc void @"_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb"(ptr %__first, ptr %__last, i1 %cmp, ptr %__first.addr.0, ptr %Label3.i.i.i241, ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, ptr %1) !dbg !4 { + br label %while.cond + + while.cond: ; preds = %if.end16, %2 + br i1 %cmp, label %if.then13, label %if.end16 + + if.then13: ; preds = %while.cond + %cmp.i = icmp eq ptr %__first, %__last + %or.cond.i = select i1 %cmp.i, i1 false, i1 false + #dbg_value(ptr %__last, !10, !DIExpression(), !16) + br i1 %or.cond.i, label %common.ret, label %for.body.i, !dbg !20 + + common.ret: ; preds = %for.body.i, %if.then13 + ret void + + for.body.i: ; preds = %if.then13 + %InputLine.i.i = getelementptr i8, ptr %__first.addr.0, i64 132 + br label %common.ret + + if.end16: ; preds = %while.cond + %__pivot.sroa.13.8.copyload.i = load i64, ptr null, align 8 + call void @_ZdlPvm() + store ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, align 8 + store i64 %__pivot.sroa.13.8.copyload.i, ptr %1, align 8 + store i64 0, ptr %__first, align 8 + store i32 0, ptr %__first.addr.0, align 8 + store i32 1, ptr %Label3.i.i.i241, align 4 + br label %while.cond + } + + !llvm.module.flags = !{!0} + !llvm.dbg.cu = !{!1} + + !0 = !{i32 2, !"Debug Info Version", i32 3} + !1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !2, producer: "clang version 22.0.0git (git@github.com:llvm/llvm-project.git 46a3b4d5dc6dd9449ec7c0c9065552368cdf41d6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, retainedTypes: !3, globals: !3, imports: !3, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk", sdk: "MacOSX15.3.sdk") + !2 = !DIFile(filename: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/build-instr-ref-stage2", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") + !3 = !{} + !4 = distinct !DISubprogram(name: "__introsort", linkageName: "_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb", scope: !6, file: !5, line: 758, type: !8, scopeLine: 762, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) + !5 = !DIFile(filename: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk/usr/include/c++/v1/__algorithm/sort.h", directory: "") + !6 = !DINamespace(name: "__1", scope: !7, exportSymbols: true) + !7 = !DINamespace(name: "std", scope: null) + !8 = !DISubroutineType(cc: DW_CC_nocall, types: !9) + !9 = !{null} + !10 = !DILocalVariable(name: "__last", arg: 2, scope: !11, file: !5, line: 284, type: !13) + !11 = distinct !DISubprogram(name: "__insertion_sort", linkageName: "_ZNSt3__116__insertion_sortB8nn180100INS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_EEvT1_SJ_T0_", scope: !6, file: !5, line: 284, type: !12, scopeLine: 284, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) + !12 = distinct !DISubroutineType(types: !9) + !13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) + !14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InputAnnotation", file: !15, line: 323, size: 768, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !3, identifier: "_ZTS15InputAnnotation") + !15 = !DIFile(filename: "llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") + !16 = !DILocation(line: 0, scope: !11, inlinedAt: !17) + !17 = distinct !DILocation(line: 800, column: 9, scope: !18) + !18 = distinct !DILexicalBlock(scope: !19, file: !5, line: 799, column: 23) + !19 = distinct !DILexicalBlock(scope: !4, file: !5, line: 770, column: 16) + !20 = !DILocation(line: 288, column: 15, scope: !21, inlinedAt: !17, atomGroup: 1, atomRank: 1) + !21 = distinct !DILexicalBlock(scope: !11, file: !5, line: 288, column: 7) +... +--- +name: '_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb' +alignment: 4 +tracksRegLiveness: true +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +debugInstrRef: true +tracksDebugUserValues: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$w2' } + - { reg: '$x3' } + - { reg: '$x4' } + - { reg: '$x5' } + - { reg: '$x6' } + - { reg: '$x7' } +frameInfo: + stackSize: 112 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + isCalleeSavedInfoValid: true +stack: + - { id: 0, type: spill-slot, offset: -104, size: 8, alignment: 8 } + - { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '$lr' } + - { id: 2, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '$fp' } + - { id: 3, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$x19' } + - { id: 4, type: spill-slot, offset: -32, size: 8, alignment: 8, callee-saved-register: '$x20' } + - { id: 5, type: spill-slot, offset: -40, size: 8, alignment: 8, callee-saved-register: '$x21' } + - { id: 6, type: spill-slot, offset: -48, size: 8, alignment: 8, callee-saved-register: '$x22' } + - { id: 7, type: spill-slot, offset: -56, size: 8, alignment: 8, callee-saved-register: '$x23' } + - { id: 8, type: spill-slot, offset: -64, size: 8, alignment: 8, callee-saved-register: '$x24' } + - { id: 9, type: spill-slot, offset: -72, size: 8, alignment: 8, callee-saved-register: '$x25' } + - { id: 10, type: spill-slot, offset: -80, size: 8, alignment: 8, callee-saved-register: '$x26' } + - { id: 11, type: spill-slot, offset: -88, size: 8, alignment: 8, callee-saved-register: '$x27' } + - { id: 12, type: spill-slot, offset: -96, size: 8, alignment: 8, callee-saved-register: '$x28' } +machineFunctionInfo: + hasRedZone: false + stackSizeZPR: 0 + stackSizePPR: 0 + hasStackFrame: true +body: | + bb.0 (%ir-block.2): + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + liveins: $w2, $x0, $x1, $x3, $x4, $x5, $x6, $x7, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20, $lr + + $sp = frame-setup SUBXri $sp, 112, 0 + frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.12), (store (s64) into %stack.11) + frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.10), (store (s64) into %stack.9) + frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.8), (store (s64) into %stack.7) + frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.6), (store (s64) into %stack.5) + frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.4), (store (s64) into %stack.3) + frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + frame-setup CFI_INSTRUCTION def_cfa_offset 112 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w29, -16 + frame-setup CFI_INSTRUCTION offset $w19, -24 + frame-setup CFI_INSTRUCTION offset $w20, -32 + frame-setup CFI_INSTRUCTION offset $w21, -40 + frame-setup CFI_INSTRUCTION offset $w22, -48 + frame-setup CFI_INSTRUCTION offset $w23, -56 + frame-setup CFI_INSTRUCTION offset $w24, -64 + frame-setup CFI_INSTRUCTION offset $w25, -72 + frame-setup CFI_INSTRUCTION offset $w26, -80 + frame-setup CFI_INSTRUCTION offset $w27, -88 + frame-setup CFI_INSTRUCTION offset $w28, -96 + DBG_PHI $x1, 1 + $x19 = ORRXrs $xzr, killed $x7, 0 + $x20 = ORRXrs $xzr, killed $x6, 0 + $x21 = ORRXrs $xzr, killed $x5, 0 + $x22 = ORRXrs $xzr, killed $x4, 0 + $x23 = ORRXrs $xzr, killed $x3, 0 + $w25 = ORRWrs $wzr, killed $w2, 0 + $x26 = ORRXrs $xzr, killed $x0, 0 + renamable $w27 = MOVZWi 1, 0 + STRXui killed $x1, $sp, 1 :: (store (s64) into %stack.0) + TBNZW renamable $w25, 0, %bb.2 + + bb.3.if.end16: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + liveins: $w25, $w27, $x19, $x20, $x21, $x22, $x23, $x26 + + $x28 = ORRXrs $xzr, $xzr, 0 + renamable $x24 = LDRXui killed renamable $x28, 0 :: (load (s64) from `ptr null`) + BL @_ZdlPvm, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + STRXui renamable $x21, renamable $x20, 0 :: (store (s64) into %ir.0) + STRXui killed renamable $x24, renamable $x19, 0 :: (store (s64) into %ir.1) + STRXui $xzr, renamable $x26, 0 :: (store (s64) into %ir.__first) + STRWui $wzr, renamable $x23, 0 :: (store (s32) into %ir.__first.addr.0, align 8) + STRWui renamable $w27, renamable $x22, 0 :: (store (s32) into %ir.Label3.i.i.i241) + TBZW renamable $w25, 0, %bb.3 + + bb.2.if.then13: + liveins: $x26 + + DBG_INSTR_REF !10, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(1, 0), debug-location !16 + renamable $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.0) + ; Clobber the stack slot that contains the value we care about, to ensure that LDV can still recover it from $x8 above + STRXui $xzr, $sp, 1 :: (store (s64) into %stack.0) + $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + $x20, $x19 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.4), (load (s64) from %stack.3) + $xzr = SUBSXrs killed renamable $x26, killed renamable $x8, 0, implicit-def $nzcv, debug-location !20 + $x22, $x21 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.6), (load (s64) from %stack.5) + $x24, $x23 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.8), (load (s64) from %stack.7) + $x26, $x25 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.10), (load (s64) from %stack.9) + $x28, $x27 = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.12), (load (s64) from %stack.11) + $sp = frame-destroy ADDXri $sp, 112, 0 + RET undef $lr +... diff --git a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll new file mode 100644 index 0000000000000..78c328deff697 --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll @@ -0,0 +1,58 @@ +; Test to ensure that variable "__last" is properly recovered at the end of the livedebugvalues pass when Instruction Referencing-based LiveDebugValues is used. +; This testcase was obtained by looking at FileCheck.cpp and reducing it down via llvm-reduce. + +; RUN: llc -mtriple=aarch64-apple-darwin -o - %s -stop-after=livedebugvalues -O2 -experimental-debug-variable-locations | FileCheck %s + +; CHECK: ![[LOC:[0-9]+]] = !DILocalVariable(name: "__last", +; CHECK: DBG_VALUE_LIST ![[LOC]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 8, DW_OP_deref), $sp + + +declare void @_ZdlPvm() +define fastcc void @"_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb"(ptr %__first, ptr %__last, i1 %cmp, ptr %__first.addr.0, ptr %Label3.i.i.i241, ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, ptr %1) !dbg !4 { + br label %while.cond +while.cond: ; preds = %if.end16, %entry + br i1 %cmp, label %if.then13, label %if.end16 +if.then13: ; preds = %while.cond + %cmp.i = icmp eq ptr %__first, %__last + %or.cond.i = select i1 %cmp.i, i1 false, i1 false + #dbg_value(ptr %__last, !10, !DIExpression(), !16) + br i1 %or.cond.i, label %common.ret, label %for.body.i, !dbg !23 +common.ret: ; preds = %for.body.i, %if.then13 + ret void +for.body.i: ; preds = %if.then13 + %InputLine.i.i = getelementptr i8, ptr %__first.addr.0, i64 132 + br label %common.ret +if.end16: ; preds = %while.cond + %__pivot.sroa.13.8.copyload.i = load i64, ptr null, align 8 + call void @_ZdlPvm() + store ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, align 8 + store i64 %__pivot.sroa.13.8.copyload.i, ptr %1, align 8 + store i64 0, ptr %__first, align 8 + store i32 0, ptr %__first.addr.0, align 8 + store i32 1, ptr %Label3.i.i.i241, align 4 + br label %while.cond +} +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !2, producer: "clang version 22.0.0git (git@github.com:llvm/llvm-project.git 46a3b4d5dc6dd9449ec7c0c9065552368cdf41d6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, retainedTypes: !3, globals: !3, imports: !3, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk", sdk: "MacOSX15.3.sdk") +!2 = !DIFile(filename: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/build-instr-ref-stage2", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") +!3 = !{} +!4 = distinct !DISubprogram(name: "__introsort", linkageName: "_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb", scope: !6, file: !5, line: 758, type: !8, scopeLine: 762, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) +!5 = !DIFile(filename: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk/usr/include/c++/v1/__algorithm/sort.h", directory: "") +!6 = !DINamespace(name: "__1", scope: !7, exportSymbols: true) +!7 = !DINamespace(name: "std", scope: null) +!8 = !DISubroutineType(cc: DW_CC_nocall, types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "__last", arg: 2, scope: !11, file: !5, line: 284, type: !13) +!11 = distinct !DISubprogram(name: "__insertion_sort", linkageName: "_ZNSt3__116__insertion_sortB8nn180100INS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_EEvT1_SJ_T0_", scope: !6, file: !5, line: 284, type: !12, scopeLine: 284, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) +!12 = distinct !DISubroutineType(types: !9) +!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InputAnnotation", file: !15, line: 323, size: 768, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !3, identifier: "_ZTS15InputAnnotation") +!15 = !DIFile(filename: "llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") +!16 = !DILocation(line: 0, scope: !11, inlinedAt: !17) +!17 = distinct !DILocation(line: 800, column: 9, scope: !18) +!18 = distinct !DILexicalBlock(scope: !22, file: !5, line: 799, column: 23) +!22 = distinct !DILexicalBlock(scope: !4, file: !5, line: 770, column: 16) +!23 = !DILocation(line: 288, column: 15, scope: !24, inlinedAt: !17, atomGroup: 1, atomRank: 1) +!24 = distinct !DILexicalBlock(scope: !11, file: !5, line: 288, column: 7) From 17789e9fa8e1417dacd4cf4fe1ef732a78530730 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 14 Nov 2025 10:36:50 -0800 Subject: [PATCH 18/56] [MemCpyOpt][profcheck] Set `unknown` branch weights for certain selects (#167597) Issue #147390 --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 5 +++++ llvm/test/Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 08be5df9872b7..db2afe26bc65a 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1366,6 +1367,10 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); Value *MemsetLen = Builder.CreateSelect( Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); + // FIXME (#167968): we could explore estimating the branch_weights based on + // value profiling data about the 2 sizes. + if (auto *SI = dyn_cast(MemsetLen)) + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE); Instruction *NewMemSet = Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize), MemSet->getOperand(1), MemsetLen, Alignment); diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll index 3f577f09ada1e..edd64e5efe1aa 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-dbgloc.ll @@ -7,14 +7,14 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) -define void @test_constant(i64 %src_size, ptr %dst, i64 %dst_size, i8 %c) !dbg !5 { +define void @test_constant(i64 %src_size, ptr %dst, i64 %dst_size, i8 %c) !dbg !5 !prof !14 { ; CHECK-LABEL: define void @test_constant( -; CHECK-SAME: i64 [[SRC_SIZE:%.*]], ptr [[DST:%.*]], i64 [[DST_SIZE:%.*]], i8 [[C:%.*]]) !dbg [[DBG5:![0-9]+]] { +; CHECK-SAME: i64 [[SRC_SIZE:%.*]], ptr [[DST:%.*]], i64 [[DST_SIZE:%.*]], i8 [[C:%.*]]) !dbg [[DBG5:![0-9]+]] !prof {{.*}} { ; CHECK-NEXT: [[NON_ZERO:%.*]] = icmp ne i64 [[SRC_SIZE]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[NON_ZERO]]) ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE]], [[SRC_SIZE]], !dbg [[DBG11:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]], !dbg [[DBG11]] -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]], !dbg [[DBG11]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]], !dbg [[DBG11]], !prof [[SELPROF:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SRC_SIZE]], !dbg [[DBG11]] ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP4]], i8 [[C]], i64 [[TMP3]], i1 false), !dbg [[DBG11]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr @C, i64 [[SRC_SIZE]], i1 false), !dbg [[DBG12:![0-9]+]] @@ -29,6 +29,7 @@ define void @test_constant(i64 %src_size, ptr %dst, i64 %dst_size, i8 %c) !dbg ! ; Validate that the memset is mapped to DILocation for the original memset. ; CHECK: [[DBG11]] = !DILocation(line: 1, +; CHECK: [[SELPROF]] = !{!"unknown", !"memcpyopt"} ; CHECK: [[DBG12]] = !DILocation(line: 2, ; CHECK: [[DBG13]] = !DILocation(line: 3, @@ -50,3 +51,4 @@ define void @test_constant(i64 %src_size, ptr %dst, i64 %dst_size, i8 %c) !dbg ! !11 = !DILocation(line: 1, column: 1, scope: !5) !12 = !DILocation(line: 2, column: 1, scope: !5) !13 = !DILocation(line: 3, column: 1, scope: !5) +!14 = !{!"function_entry_count", i32 10} \ No newline at end of file From b9c769bae5af7e8087835c3a0587daf725a17a39 Mon Sep 17 00:00:00 2001 From: Shreeyash Pandey Date: Sat, 15 Nov 2025 00:07:37 +0530 Subject: [PATCH 19/56] [libc] fix EXPECT_EXIT suspend/timeout for darwin (#166065) Fixes: https://github.com/llvm/llvm-project/issues/166059 --------- Signed-off-by: Shreeyash Pandey --- libc/test/UnitTest/ExecuteFunctionUnix.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp index c0e85c2144005..7c2eb7c6e887c 100644 --- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp +++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp @@ -57,9 +57,7 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, int timeout_ms) { } ::close(pipe_fds[1]); - struct pollfd poll_fd { - pipe_fds[0], 0, 0 - }; + struct pollfd poll_fd{pipe_fds[0], POLLIN, 0}; // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. if (::poll(&poll_fd, 1, timeout_ms) == -1) { From c29b29bb6a7f8b1af00f65aadc9b666764e52154 Mon Sep 17 00:00:00 2001 From: Konstantin Varlamov Date: Fri, 14 Nov 2025 10:39:06 -0800 Subject: [PATCH 20/56] [libc++][hardening] Allow setting the assertion semantic via CMake. (#167636) Add a new CMake variable, `LIBCXX_ASSERTION_SEMANTIC`, that largely mirrors `LIBCXX_HARDENING_MODE`, except that it also supports a special value `hardening_dependent` that indicates the semantic will be selected based on the hardening mode in effect: - `fast` and `extensive` map to `quick_enforce`; - `debug` maps to `enforce`. --- libcxx/CMakeLists.txt | 24 +++++++ libcxx/docs/Hardening.rst | 21 ++++++ libcxx/include/__config | 14 +++- libcxx/include/__config_site.in | 1 + libcxx/include/__configuration/hardening.h | 72 ++++++++++++++----- .../odr_signature.assertion_semantics.sh.cpp | 71 ++++++++++++++++++ .../assertion_semantic_incorrect_value.sh.cpp | 29 ++++++++ .../override_with_enforce_semantic.pass.cpp | 29 ++++++++ .../override_with_ignore_semantic.pass.cpp | 26 +++++++ .../override_with_observe_semantic.pass.cpp | 27 +++++++ ...rride_with_quick_enforce_semantic.pass.cpp | 28 ++++++++ .../test_check_assertion.pass.cpp | 10 +-- 12 files changed, 327 insertions(+), 25 deletions(-) create mode 100644 libcxx/test/extensions/libcxx/odr_signature.assertion_semantics.sh.cpp create mode 100644 libcxx/test/libcxx/assertions/semantics/assertion_semantic_incorrect_value.sh.cpp create mode 100644 libcxx/test/libcxx/assertions/semantics/override_with_enforce_semantic.pass.cpp create mode 100644 libcxx/test/libcxx/assertions/semantics/override_with_ignore_semantic.pass.cpp create mode 100644 libcxx/test/libcxx/assertions/semantics/override_with_observe_semantic.pass.cpp create mode 100644 libcxx/test/libcxx/assertions/semantics/override_with_quick_enforce_semantic.pass.cpp diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index a119850cd808e..1423b6713fd35 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -66,6 +66,19 @@ if (NOT "${LIBCXX_HARDENING_MODE}" IN_LIST LIBCXX_SUPPORTED_HARDENING_MODES) message(FATAL_ERROR "Unsupported hardening mode: '${LIBCXX_HARDENING_MODE}'. Supported values are ${LIBCXX_SUPPORTED_HARDENING_MODES}.") endif() +set(LIBCXX_SUPPORTED_ASSERTION_SEMANTICS hardening_dependent ignore observe quick_enforce enforce) +set(LIBCXX_ASSERTION_SEMANTIC "hardening_dependent" CACHE STRING + "Specify the default assertion semantic to use. This semantic will be used + inside the compiled library and will be the default when compiling user code. + Note that users can override this setting in their own code. This does not + affect the ABI. Supported values are ${LIBCXX_SUPPORTED_ASSERTION_SEMANTICS}. + `hardening_dependent` is a special value that instructs the library to select + the assertion semantic based on the hardening mode in effect.") + +if (NOT "${LIBCXX_ASSERTION_SEMANTIC}" IN_LIST LIBCXX_SUPPORTED_ASSERTION_SEMANTICS) + message(FATAL_ERROR + "Unsupported assertion semantic: '${LIBCXX_ASSERTION_SEMANTIC}'. Supported values are ${LIBCXX_SUPPORTED_ASSERTION_SEMANTICS}.") +endif() set(LIBCXX_ASSERTION_HANDLER_FILE "vendor/llvm/default_assertion_handler.in" CACHE STRING @@ -763,6 +776,17 @@ elseif (LIBCXX_HARDENING_MODE STREQUAL "extensive") elseif (LIBCXX_HARDENING_MODE STREQUAL "debug") config_define(8 _LIBCPP_HARDENING_MODE_DEFAULT) endif() +if (LIBCXX_ASSERTION_SEMANTIC STREQUAL "hardening_dependent") + config_define(2 _LIBCPP_ASSERTION_SEMANTIC_DEFAULT) +elseif (LIBCXX_ASSERTION_SEMANTIC STREQUAL "ignore") + config_define(4 _LIBCPP_ASSERTION_SEMANTIC_DEFAULT) +elseif (LIBCXX_ASSERTION_SEMANTIC STREQUAL "observe") + config_define(8 _LIBCPP_ASSERTION_SEMANTIC_DEFAULT) +elseif (LIBCXX_ASSERTION_SEMANTIC STREQUAL "quick_enforce") + config_define(16 _LIBCPP_ASSERTION_SEMANTIC_DEFAULT) +elseif (LIBCXX_ASSERTION_SEMANTIC STREQUAL "enforce") + config_define(32 _LIBCPP_ASSERTION_SEMANTIC_DEFAULT) +endif() if (LIBCXX_PSTL_BACKEND STREQUAL "serial") config_define(1 _LIBCPP_PSTL_BACKEND_SERIAL) diff --git a/libcxx/docs/Hardening.rst b/libcxx/docs/Hardening.rst index 1cdb3605c38ab..1360518379aef 100644 --- a/libcxx/docs/Hardening.rst +++ b/libcxx/docs/Hardening.rst @@ -328,6 +328,20 @@ following options to the compiler: All the :ref:`same notes ` apply to setting this macro as for setting ``_LIBCPP_HARDENING_MODE``. +Notes for vendors +----------------- + +Similarly to hardening modes, vendors can set the default assertion semantic by +providing ``LIBCXX_ASSERTION_SEMANTIC`` as a configuration option, with the +possible values of ``hardening_dependent``, ``ignore``, ``observe``, +``quick_enforce`` and ``enforce``. The default value is ``hardening_dependent`` +which is a special value that instructs the library to select the semantic based +on the hardening mode in effect (the mapping is described in +:ref:`the main section on assertion semantics `). + +This option controls both the assertion semantic that the precompiled library is +built with and the default assertion semantic that users will build with. + .. _override-assertion-handler: Overriding the assertion failure handler @@ -447,6 +461,13 @@ The first character of an ABI tag encodes the hardening mode: - ``d`` -- [d]ebug mode; - ``n`` -- [n]one mode. +The second character of an ABI tag encodes the assertion semantic: + +- ``i`` -- [i]gnore semantic; +- ``o`` -- [o]bserve semantic; +- ``q`` -- [q]uick-enforce semantic; +- ``e`` -- [e]nforce semantic. + Hardened containers status ========================== diff --git a/libcxx/include/__config b/libcxx/include/__config index d079bf8b500b6..8f461599ffd5b 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -291,6 +291,16 @@ typedef __char32_t char32_t; # define _LIBCPP_HARDENING_SIG n // "none" # endif +# if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_OBSERVE +# define _LIBCPP_ASSERTION_SEMANTIC_SIG o +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE +# define _LIBCPP_ASSERTION_SEMANTIC_SIG q +# elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# define _LIBCPP_ASSERTION_SEMANTIC_SIG e +# else +# define _LIBCPP_ASSERTION_SEMANTIC_SIG i // `ignore` +# endif + # if !_LIBCPP_HAS_EXCEPTIONS # define _LIBCPP_EXCEPTIONS_SIG n # else @@ -298,7 +308,9 @@ typedef __char32_t char32_t; # endif # define _LIBCPP_ODR_SIGNATURE \ - _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_EXCEPTIONS_SIG), _LIBCPP_VERSION) + _LIBCPP_CONCAT( \ + _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_ASSERTION_SEMANTIC_SIG), _LIBCPP_EXCEPTIONS_SIG), \ + _LIBCPP_VERSION) // This macro marks a symbol as being hidden from libc++'s ABI. This is achieved // on two levels: diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in index b68c0c8258366..6dcca1849a96c 100644 --- a/libcxx/include/__config_site.in +++ b/libcxx/include/__config_site.in @@ -40,6 +40,7 @@ // Hardening. #cmakedefine _LIBCPP_HARDENING_MODE_DEFAULT @_LIBCPP_HARDENING_MODE_DEFAULT@ +#cmakedefine _LIBCPP_ASSERTION_SEMANTIC_DEFAULT @_LIBCPP_ASSERTION_SEMANTIC_DEFAULT@ // __USE_MINGW_ANSI_STDIO gets redefined on MinGW #ifdef __clang__ diff --git a/libcxx/include/__configuration/hardening.h b/libcxx/include/__configuration/hardening.h index bc2a8d078fa77..5723f5a65e1bf 100644 --- a/libcxx/include/__configuration/hardening.h +++ b/libcxx/include/__configuration/hardening.h @@ -135,13 +135,26 @@ _LIBCPP_HARDENING_MODE_EXTENSIVE, \ _LIBCPP_HARDENING_MODE_DEBUG #endif -// Hardening assertion semantics generally mirror the evaluation semantics of C++26 Contracts: +// The library provides the macro `_LIBCPP_ASSERTION_SEMANTIC` for configuring the assertion semantic used by hardening; +// it can be set to one of the following values: +// +// - `_LIBCPP_ASSERTION_SEMANTIC_IGNORE`; +// - `_LIBCPP_ASSERTION_SEMANTIC_OBSERVE`; +// - `_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE`; +// - `_LIBCPP_ASSERTION_SEMANTIC_ENFORCE`. +// +// libc++ assertion semantics generally mirror the evaluation semantics of C++26 Contracts: // - `ignore` evaluates the assertion but doesn't do anything if it fails (note that it differs from the Contracts // `ignore` semantic which wouldn't evaluate the assertion at all); // - `observe` logs an error (indicating, if possible, that the error is fatal) and continues execution; // - `quick-enforce` terminates the program as fast as possible (via trapping); // - `enforce` logs an error and then terminates the program. // +// Additionally, a special `hardening-dependent` value selects the assertion semantic based on the hardening mode in +// effect: the production-capable modes (`fast` and `extensive`) map to `quick_enforce` and the `debug` mode maps to +// `enforce`. The `hardening-dependent` semantic cannot be selected explicitly, it is only used when no assertion +// semantic is provided by the user _and_ the library's default semantic is configured to be dependent on hardening. +// // Notes: // - Continuing execution after a hardening check fails results in undefined behavior; the `observe` semantic is meant // to make adopting hardening easier but should not be used outside of this scenario; @@ -150,32 +163,53 @@ _LIBCPP_HARDENING_MODE_DEBUG // hardened preconditions, however, be aware that using `ignore` does not produce a conforming "Hardened" // implementation, unlike the other semantics above. // clang-format off -# define _LIBCPP_ASSERTION_SEMANTIC_IGNORE (1 << 1) -# define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE (1 << 2) -# define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (1 << 3) -# define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE (1 << 4) +# define _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT (1 << 1) +# define _LIBCPP_ASSERTION_SEMANTIC_IGNORE (1 << 2) +# define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE (1 << 3) +# define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (1 << 4) +# define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE (1 << 5) // clang-format on -// Allow users to define an arbitrary assertion semantic; otherwise, use the default mapping from modes to semantics. -// The default is for production-capable modes to use `quick-enforce` (i.e., trap) and for the `debug` mode to use -// `enforce` (i.e., log and abort). -#ifndef _LIBCPP_ASSERTION_SEMANTIC - -# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG -# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE -# else -# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE -# endif - -#else - +// If the user attempts to configure the assertion semantic, check that it is allowed in the current environment. +#if defined(_LIBCPP_ASSERTION_SEMANTIC) # if !_LIBCPP_HAS_EXPERIMENTAL_LIBRARY # error "Assertion semantics are an experimental feature." # endif # if defined(_LIBCPP_CXX03_LANG) # error "Assertion semantics are not available in the C++03 mode." # endif +#endif // defined(_LIBCPP_ASSERTION_SEMANTIC) + +// User-provided semantic takes top priority -- don't override if set. +#ifndef _LIBCPP_ASSERTION_SEMANTIC -#endif // _LIBCPP_ASSERTION_SEMANTIC +# ifndef _LIBCPP_ASSERTION_SEMANTIC_DEFAULT +# error _LIBCPP_ASSERTION_SEMANTIC_DEFAULT is not defined. This definition should be set at configuration time in \ +the `__config_site` header, please make sure your installation of libc++ is not broken. +# endif + +# if _LIBCPP_ASSERTION_SEMANTIC_DEFAULT != _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_DEFAULT +# else +# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# else +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE +# endif +# endif // _LIBCPP_ASSERTION_SEMANTIC_DEFAULT != _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT + +#endif // #ifndef _LIBCPP_ASSERTION_SEMANTIC + +// Finally, validate the selected semantic (in case the user tries setting it to an incorrect value): +#if _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_IGNORE && \ + _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_OBSERVE && \ + _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE && \ + _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# error _LIBCPP_ASSERTION_SEMANTIC must be set to one of the following values: \ +_LIBCPP_ASSERTION_SEMANTIC_IGNORE, \ +_LIBCPP_ASSERTION_SEMANTIC_OBSERVE, \ +_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE, \ +_LIBCPP_ASSERTION_SEMANTIC_ENFORCE +#endif #endif // _LIBCPP___CONFIGURATION_HARDENING_H diff --git a/libcxx/test/extensions/libcxx/odr_signature.assertion_semantics.sh.cpp b/libcxx/test/extensions/libcxx/odr_signature.assertion_semantics.sh.cpp new file mode 100644 index 0000000000000..8ec1ec824c13c --- /dev/null +++ b/libcxx/test/extensions/libcxx/odr_signature.assertion_semantics.sh.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ABI tags have no effect in MSVC mode. +// XFAIL: msvc + +// Assertion semantics are not supported in C++03 mode and currently are experimental. +// UNSUPPORTED: c++03, libcpp-has-no-experimental-hardening-observe-semantic + +// Test that we encode the assertion semantic in an ABI tag to avoid ODR violations when linking TUs that have different +// values for it. + +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU1 -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_IGNORE -o %t.tu1.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU2 -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_OBSERVE -o %t.tu2.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU3 -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE -o %t.tu3.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DTU4 -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_ENFORCE -o %t.tu4.o +// RUN: %{cxx} %s %{flags} %{compile_flags} -c -DMAIN -o %t.main.o +// RUN: %{cxx} %t.tu1.o %t.tu2.o %t.tu3.o %t.tu4.o %t.main.o %{flags} %{link_flags} -o %t.exe +// RUN: %{exec} %t.exe + +#include "test_macros.h" + +// `ignore` assertion semantic. +#ifdef TU1 +# include <__config> +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 1; } +int tu1() { return f(); } +#endif // TU1 + +// `observe` assertion semantic. +#ifdef TU2 +# include <__config> +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 2; } +int tu2() { return f(); } +#endif // TU2 + +// `quick-enforce` assertion semantic. +#ifdef TU3 +# include <__config> +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 3; } +int tu3() { return f(); } +#endif // TU3 + +// `enforce` assertion semantic. +#ifdef TU4 +# include <__config> +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 4; } +int tu4() { return f(); } +#endif // TU4 + +#ifdef MAIN +# include + +int tu1(); +int tu2(); +int tu3(); +int tu4(); + +int main(int, char**) { + assert(tu1() == 1); + assert(tu2() == 2); + assert(tu3() == 3); + assert(tu4() == 4); + return 0; +} +#endif // MAIN diff --git a/libcxx/test/libcxx/assertions/semantics/assertion_semantic_incorrect_value.sh.cpp b/libcxx/test/libcxx/assertions/semantics/assertion_semantic_incorrect_value.sh.cpp new file mode 100644 index 0000000000000..d7c1e26491ba9 --- /dev/null +++ b/libcxx/test/libcxx/assertions/semantics/assertion_semantic_incorrect_value.sh.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test verifies that setting the assertion semantic to a value that's not part of the predefined constants +// triggers a compile-time error. + +// Modules build produces a different error ("Could not build module 'std'"). +// UNSUPPORTED: clang-modules-build +// UNSUPPORTED: c++03, libcpp-has-no-experimental-hardening-observe-semantic +// REQUIRES: verify-support + +// RUN: %{verify} -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=42 +// `hardening-dependent` cannot be set as the semantic (it's only an indicator to use hardening-related logic to pick +// the final semantic). +// RUN: %{verify} -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT +// Make sure that common cases of misuse produce readable errors. We deliberately disallow setting the assertion +// semantic as if it were a boolean flag. +// RUN: %{verify} -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=0 +// RUN: %{verify} -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=1 +// RUN: %{verify} -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC + +#include + +// expected-error@*:* {{_LIBCPP_ASSERTION_SEMANTIC must be set to one of the following values: _LIBCPP_ASSERTION_SEMANTIC_IGNORE, _LIBCPP_ASSERTION_SEMANTIC_OBSERVE, _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE, _LIBCPP_ASSERTION_SEMANTIC_ENFORCE}} diff --git a/libcxx/test/libcxx/assertions/semantics/override_with_enforce_semantic.pass.cpp b/libcxx/test/libcxx/assertions/semantics/override_with_enforce_semantic.pass.cpp new file mode 100644 index 0000000000000..056864e1aea71 --- /dev/null +++ b/libcxx/test/libcxx/assertions/semantics/override_with_enforce_semantic.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test ensures that we can override the assertion semantic used by any checked hardening mode with `enforce` on +// a per-TU basis (this is valid for the `debug` mode as well, though a no-op). + +// `check_assertion.h` is only available starting from C++11 and requires Unix headers and regex support. +// REQUIRES: has-unix-headers +// UNSUPPORTED: c++03, no-localization +// UNSUPPORTED: libcpp-hardening-mode=none, libcpp-has-no-experimental-hardening-observe-semantic +// The ability to set a custom abort message is required to compare the assertion message. +// XFAIL: availability-verbose_abort-missing +// ADDITIONAL_COMPILE_FLAGS: -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_ENFORCE + +#include +#include "check_assertion.h" + +int main(int, char**) { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(true, "Should not fire"); + TEST_LIBCPP_ASSERT_FAILURE([] { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "Should fire and log a message"); }(), + "Should fire and log a message"); + + return 0; +} diff --git a/libcxx/test/libcxx/assertions/semantics/override_with_ignore_semantic.pass.cpp b/libcxx/test/libcxx/assertions/semantics/override_with_ignore_semantic.pass.cpp new file mode 100644 index 0000000000000..b8c9028fe2e5c --- /dev/null +++ b/libcxx/test/libcxx/assertions/semantics/override_with_ignore_semantic.pass.cpp @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test ensures that we can override the assertion semantic used by any hardening mode with `ignore` on a per-TU +// basis (this is valid for the `none` mode as well, though a no-op). + +// `check_assertion.h` is only available starting from C++11 and requires Unix headers and regex support. +// REQUIRES: has-unix-headers +// UNSUPPORTED: c++03, no-localization +// UNSUPPORTED: libcpp-has-no-experimental-hardening-observe-semantic +// ADDITIONAL_COMPILE_FLAGS: -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_IGNORE + +#include +#include "check_assertion.h" + +int main(int, char**) { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(true, "Should not fire"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "Also should not fire"); + + return 0; +} diff --git a/libcxx/test/libcxx/assertions/semantics/override_with_observe_semantic.pass.cpp b/libcxx/test/libcxx/assertions/semantics/override_with_observe_semantic.pass.cpp new file mode 100644 index 0000000000000..a14c44f5a8e73 --- /dev/null +++ b/libcxx/test/libcxx/assertions/semantics/override_with_observe_semantic.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test ensures that we can override the assertion semantic used by any checked hardening mode with `observe` on +// a per-TU basis. + +// `check_assertion.h` is only available starting from C++11 and requires Unix headers and regex support. +// REQUIRES: has-unix-headers +// UNSUPPORTED: c++03, no-localization +// UNSUPPORTED: libcpp-hardening-mode=none, libcpp-has-no-experimental-hardening-observe-semantic +// ADDITIONAL_COMPILE_FLAGS: -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_OBSERVE + +#include +#include "check_assertion.h" + +int main(int, char**) { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(true, "Should not fire"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "Also should not fire"); + // TODO(hardening): check that a message is logged. + + return 0; +} diff --git a/libcxx/test/libcxx/assertions/semantics/override_with_quick_enforce_semantic.pass.cpp b/libcxx/test/libcxx/assertions/semantics/override_with_quick_enforce_semantic.pass.cpp new file mode 100644 index 0000000000000..be5038c4bb4ff --- /dev/null +++ b/libcxx/test/libcxx/assertions/semantics/override_with_quick_enforce_semantic.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test ensures that we can override the assertion semantic used by any checked hardening mode with `quick-enforce` +// on a per-TU basis (this is valid for the `fast` and `extensive` modes as well, though a no-op). + +// `check_assertion.h` is only available starting from C++11 and requires Unix headers and regex support. +// REQUIRES: has-unix-headers +// UNSUPPORTED: c++03, no-localization +// UNSUPPORTED: libcpp-hardening-mode=none, libcpp-has-no-experimental-hardening-observe-semantic +// ADDITIONAL_COMPILE_FLAGS: -U_LIBCPP_ASSERTION_SEMANTIC -D_LIBCPP_ASSERTION_SEMANTIC=_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE + +#include +#include "check_assertion.h" + +int main(int, char**) { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(true, "Should not fire"); + TEST_LIBCPP_ASSERT_FAILURE( + [] { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "Should fire without logging a message"); }(), + "The message should not matter"); + + return 0; +} diff --git a/libcxx/test/support/test.support/test_check_assertion.pass.cpp b/libcxx/test/support/test.support/test_check_assertion.pass.cpp index 78e47b32cdd2b..9d356ef30a501 100644 --- a/libcxx/test/support/test.support/test_check_assertion.pass.cpp +++ b/libcxx/test/support/test.support/test_check_assertion.pass.cpp @@ -21,11 +21,11 @@ template bool TestDeathTest( Outcome expected_outcome, DeathCause expected_cause, const char* stmt, Func&& func, const Matcher& matcher) { auto get_matcher = [&] { -#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG - return matcher; -#else +#if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (void)matcher; return MakeAnyMatcher(); +#else + return matcher; #endif }; @@ -69,7 +69,7 @@ bool TestDeathTest( // clang-format on -#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +#if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE DeathCause assertion_death_cause = DeathCause::VerboseAbort; #else DeathCause assertion_death_cause = DeathCause::Trap; @@ -99,7 +99,7 @@ int main(int, char**) { // Success -- assertion failure with a specific matcher. TEST_DEATH_TEST_MATCHES(Outcome::Success, assertion_death_cause, good_matcher, fail_assert()); -# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +# if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE // Failure -- error message doesn't match. TEST_DEATH_TEST_MATCHES(Outcome::UnexpectedErrorMessage, assertion_death_cause, bad_matcher, fail_assert()); # endif From 9d7e341032134b8166def8c5996e491ff4d4071e Mon Sep 17 00:00:00 2001 From: Anton Shepelev <44649959+amemov@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:39:20 -0800 Subject: [PATCH 21/56] [libc][POSIX][RISCV] Disabled clock_settime on RV32 (#168006) --- libc/config/linux/riscv/exclude.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libc/config/linux/riscv/exclude.txt b/libc/config/linux/riscv/exclude.txt index f2f553f78933c..1bffb268f8708 100644 --- a/libc/config/linux/riscv/exclude.txt +++ b/libc/config/linux/riscv/exclude.txt @@ -6,3 +6,8 @@ if(NOT HAVE_SYS_FACCESSAT2) libc.src.unistd.faccessat ) endif() +if(LIBC_TARGET_ARCHITECTURE_IS_RISCV32) + list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS + libc.src.time.clock_settime + ) +endif() \ No newline at end of file From 07740fb3b5c8140476355efdcea2abcd435f7d1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 14 Nov 2025 10:40:11 -0800 Subject: [PATCH 22/56] [mlir][NVVM][NFC] Remove useless options form run lines (#168098) Address post commit comments from #167958 --- mlir/test/Target/LLVMIR/nvvm/barrier.mlir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir index 1887f230bc952..a18633ef208c6 100644 --- a/mlir/test/Target/LLVMIR/nvvm/barrier.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/barrier.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s --check-prefix=LLVM -// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --check-prefix=LLVM +// RUN: mlir-opt %s | mlir-opt | FileCheck %s // LLVM-LABEL: @llvm_nvvm_barrier( // LLVM-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]]) From f7a8d201e33689c01c769a158b08b5a6e17a52b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 14 Nov 2025 10:57:08 -0800 Subject: [PATCH 23/56] DeclareRuntimeLibcalls: Use RuntimeLibraryAnalysis (#167995) Also add boilerplate to have a live instance when running opt configured from CommandFlags / TargetOptions. --- llvm/include/llvm/Analysis/RuntimeLibcallInfo.h | 2 +- llvm/lib/Analysis/RuntimeLibcallInfo.cpp | 4 +++- .../Transforms/Utils/DeclareRuntimeLibcalls.cpp | 5 ++++- .../DeclareRuntimeLibcalls/codegen-opt-flags.ll | 17 +++++++++++++++++ llvm/tools/opt/NewPMDriver.cpp | 8 +++++--- llvm/tools/opt/NewPMDriver.h | 10 +++++++--- llvm/tools/opt/optdriver.cpp | 8 +++++++- 7 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Transforms/Util/DeclareRuntimeLibcalls/codegen-opt-flags.ll diff --git a/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h index a3e1014b417e5..28a2ec47f81ad 100644 --- a/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h +++ b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h @@ -31,7 +31,7 @@ class LLVM_ABI RuntimeLibraryAnalysis friend AnalysisInfoMixin; LLVM_ABI static AnalysisKey Key; - RTLIB::RuntimeLibcallsInfo LibcallsInfo; + std::optional LibcallsInfo; }; class LLVM_ABI RuntimeLibraryInfoWrapper : public ImmutablePass { diff --git a/llvm/lib/Analysis/RuntimeLibcallInfo.cpp b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp index 6fb4119aa73f2..9ea789a4ee45a 100644 --- a/llvm/lib/Analysis/RuntimeLibcallInfo.cpp +++ b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp @@ -15,7 +15,9 @@ AnalysisKey RuntimeLibraryAnalysis::Key; RTLIB::RuntimeLibcallsInfo RuntimeLibraryAnalysis::run(const Module &M, ModuleAnalysisManager &) { - return RTLIB::RuntimeLibcallsInfo(M); + if (!LibcallsInfo) + LibcallsInfo = RTLIB::RuntimeLibcallsInfo(M); + return *LibcallsInfo; } INITIALIZE_PASS(RuntimeLibraryInfoWrapper, "runtime-library-info", diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp index dd8706cfb2855..94e8a33813b63 100644 --- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp +++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/DeclareRuntimeLibcalls.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/IR/Module.h" #include "llvm/IR/RuntimeLibcalls.h" @@ -49,7 +50,9 @@ static void mergeAttributes(LLVMContext &Ctx, const Module &M, PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M, ModuleAnalysisManager &MAM) { - RTLIB::RuntimeLibcallsInfo RTLCI(M.getTargetTriple()); + const RTLIB::RuntimeLibcallsInfo &RTLCI = + MAM.getResult(M); + LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); const Triple &TT = M.getTargetTriple(); diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/codegen-opt-flags.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/codegen-opt-flags.ll new file mode 100644 index 0000000000000..a5da90da6a74a --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/codegen-opt-flags.ll @@ -0,0 +1,17 @@ +; REQUIRES: arm-registered-target + +; Make sure that codegen flags work to change the set of libcalls +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm-none-linux-gnueabi -float-abi=hard -exception-model=sjlj -meabi=4 < %s | FileCheck %s + +; Depends on -exception-model +; CHECK: declare arm_aapcs_vfpcc void @_Unwind_SjLj_Register(...) +; CHECK: declare arm_aapcs_vfpcc void @_Unwind_SjLj_Resume(...) +; CHECK: declare arm_aapcs_vfpcc void @_Unwind_SjLj_Unregister(...) + +; Calling convention depends on -float-abi +; CHECK: declare arm_aapcs_vfpcc void @__addtf3(...) + +; memclr functions depend on -meabi +; CHECK: declare arm_aapcscc void @__aeabi_memclr(...) +; CHECK: declare arm_aapcscc void @__aeabi_memclr4(...) +; CHECK: declare arm_aapcscc void @__aeabi_memclr8(...) diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index a383415ff1cb2..01d7ac8e3f959 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/Config/llvm-config.h" @@ -351,9 +352,9 @@ static void registerEPCallbacks(PassBuilder &PB) { bool llvm::runPassPipeline( StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, - ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut, - ToolOutputFile *OptRemarkFile, StringRef PassPipeline, - ArrayRef PassPlugins, + RTLIB::RuntimeLibcallsInfo &RTLCI, ToolOutputFile *Out, + ToolOutputFile *ThinLTOLinkOut, ToolOutputFile *OptRemarkFile, + StringRef PassPipeline, ArrayRef PassPlugins, ArrayRef> PassBuilderCallbacks, OutputKind OK, VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, @@ -416,6 +417,7 @@ bool llvm::runPassPipeline( FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModuleAnalysisManager MAM; + MAM.registerPass([&] { return RuntimeLibraryAnalysis(std::move(RTLCI)); }); PassInstrumentationCallbacks PIC; PrintPassOptions PrintPassOpts; diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h index 042d5d4bbfe47..31da61b9c0cae 100644 --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -31,6 +31,10 @@ class TargetMachine; class ToolOutputFile; class TargetLibraryInfoImpl; +namespace RTLIB { +struct RuntimeLibcallsInfo; +} + extern cl::opt DebugifyEach; extern cl::opt DebugifyExport; @@ -67,9 +71,9 @@ void printPasses(raw_ostream &OS); /// nullptr. bool runPassPipeline( StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, - ToolOutputFile *Out, ToolOutputFile *ThinLinkOut, - ToolOutputFile *OptRemarkFile, StringRef PassPipeline, - ArrayRef PassPlugins, + RTLIB::RuntimeLibcallsInfo &RTLCI, ToolOutputFile *Out, + ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile, + StringRef PassPipeline, ArrayRef PassPlugins, ArrayRef> PassBuilderCallbacks, opt_tool::OutputKind OK, opt_tool::VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index ef6e5412bda48..4cf117f227c00 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/RegionPass.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/AsmParser/Parser.h" @@ -672,6 +673,11 @@ optMain(int argc, char **argv, // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfoImpl TLII(ModuleTriple); + // FIXME: Get ABI name from MCOptions + RTLIB::RuntimeLibcallsInfo RTLCI(ModuleTriple, codegen::getExceptionModel(), + codegen::getFloatABIForCalls(), + codegen::getEABIVersion()); + // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) TLII.disableAllFunctions(); @@ -746,7 +752,7 @@ optMain(int argc, char **argv, // string. Hand off the rest of the functionality to the new code for that // layer. if (!runPassPipeline( - argv[0], *M, TM.get(), &TLII, Out.get(), ThinLinkOut.get(), + argv[0], *M, TM.get(), &TLII, RTLCI, Out.get(), ThinLinkOut.get(), RemarksFile.get(), Pipeline, PluginList, PassBuilderCallbacks, OK, VK, /* ShouldPreserveAssemblyUseListOrder */ false, /* ShouldPreserveBitcodeUseListOrder */ true, EmitSummaryIndex, From 70b7958ce063fe8f9234b25a1edc4f5c89758c21 Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Fri, 14 Nov 2025 18:58:27 +0000 Subject: [PATCH 24/56] [mlir][tosa] Fix scatter duplicate indices check for int64 (#168085) This commit fixes the validation check for duplicate indices in the TOSA scatter operation when using int64 index tensors. Previously, use of int64 index tensors would cause a crash. --- mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp | 9 +++++---- mlir/test/Dialect/Tosa/invalid.mlir | 12 +++++++++++- .../Tosa/tosa-validation-version-1p1-valid.mlir | 10 ++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp b/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp index ac5d6207259eb..62c015a85ee36 100644 --- a/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp +++ b/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp @@ -216,22 +216,23 @@ mlir::tosa::convertFromIntAttr(const DenseElementsAttr &attr, const int rank) { bool mlir::tosa::hasUniqueConstantScatterIndices( ShapedType indicesType, DenseIntElementsAttr indicesAttr) { - llvm::ArrayRef const indicesShape = indicesType.getShape(); + const llvm::ArrayRef indicesShape = indicesType.getShape(); const unsigned int indicesRank = indicesShape.size(); const unsigned int lastDimSize = indicesShape[indicesRank - 1]; // check each batch of indices from the flat indicesAttr values // for duplicates - auto const indicesValues = indicesAttr.getValues(); + auto const indicesValues = indicesAttr.getValues(); assert( (indicesValues.size() % lastDimSize == 0) && "Constant indices data length should be a multiple of indicesShape[-1]"); - std::vector indices(lastDimSize); + std::vector indices(lastDimSize); for (auto beg = indicesValues.begin(); beg < indicesValues.end(); beg += lastDimSize) { std::copy(beg, beg + lastDimSize, indices.begin()); - std::sort(indices.begin(), indices.end()); + std::sort(indices.begin(), indices.end(), + [](const APInt &a, const APInt &b) { return a.slt(b); }); if (std::adjacent_find(indices.begin(), indices.end()) != indices.end()) { // found duplicate values in indices in batch return false; diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index c9e03ca53a729..3d24928487ed2 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -4,7 +4,7 @@ // validation flow. //-------------------------------------------------------------------------------------------------- -// RUN: mlir-opt %s -split-input-file -verify-diagnostics -tosa-attach-target="profiles=pro_int,pro_fp extensions=int16,int4,bf16,fp8e4m3,fp8e5m2,fft,variable,controlflow,doubleround,inexactround" -tosa-validate="strict-op-spec-alignment" +// RUN: mlir-opt %s -split-input-file -verify-diagnostics -tosa-attach-target="specification_version=1.1.draft profiles=pro_int,pro_fp extensions=int16,int4,int64,bf16,fp8e4m3,fp8e5m2,fft,variable,controlflow,doubleround,inexactround" -tosa-validate="strict-op-spec-alignment" func.func @test_cast(%arg0: tensor) -> tensor<5xi32> { @@ -2044,6 +2044,16 @@ func.func @test_scatter_duplicate_indices(%arg0: tensor<2x52x3xf32>, %arg2: tens // ----- +// CHECK-LABEL: test_scatter_duplicate_indices_int64 +func.func @test_scatter_duplicate_indices_int64(%arg0: tensor<2x52x3xf32>, %arg2: tensor<2x12x3xf32>) -> tensor<2x52x3xf32> { + %indices = "tosa.const"() { values = dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 11, 12], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]> : tensor<2x12xi64> } : () -> tensor<2x12xi64> + // expected-error@+1 {{'tosa.scatter' op indices values contain duplicates}} + %0 = tosa.scatter %arg0, %indices, %arg2 : (tensor<2x52x3xf32>, tensor<2x12xi64>, tensor<2x12x3xf32>) -> tensor<2x52x3xf32> + return %0 : tensor<2x52x3xf32> +} + +// ----- + func.func @test_reduce_all_unsupported_data_types(%arg0: tensor<2x12x11xf32>) -> tensor<1x12x11xf32> { // expected-error@+1 {{'tosa.reduce_all' op illegal: operation operand/result data types did not align with any profile or extension, got (f32,f32), did you mean (i1,i1)?}} %0 = tosa.reduce_all %arg0 {axis = 0 : i32} : (tensor<2x12x11xf32>) -> tensor<1x12x11xf32> diff --git a/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir b/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir index acbff73b8b948..c285ae3cf44ee 100644 --- a/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir +++ b/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir @@ -2,6 +2,7 @@ // ----- +// CHECK-LABEL: test_matmul_fp8_mixed_precision_operands func.func @test_matmul_fp8_mixed_precision_operands(%arg0: tensor<1x14x19xf8E4M3FN>, %arg1: tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16> { %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN> %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2> @@ -146,3 +147,12 @@ func.func @test_argmax_bf16_i64(%arg0: tensor<12x8x16xbf16>) -> tensor<12x16xi64 %0 = tosa.argmax %arg0 { axis = 1 : i32 } : (tensor<12x8x16xbf16>) -> tensor<12x16xi64> return %0 : tensor<12x16xi64> } + +// ----- + +// CHECK-LABEL: test_scatter_const_indices_int64 +func.func @test_scatter_const_indices_int64(%arg0: tensor<2x52x3xf32>, %arg2: tensor<2x12x3xf32>) -> tensor<2x52x3xf32> { + %indices = "tosa.const"() { values = dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]> : tensor<2x12xi64> } : () -> tensor<2x12xi64> + %0 = tosa.scatter %arg0, %indices, %arg2 : (tensor<2x52x3xf32>, tensor<2x12xi64>, tensor<2x12x3xf32>) -> tensor<2x52x3xf32> + return %0 : tensor<2x52x3xf32> +} From dd7a000a3114543d58786bbfb84870446034e767 Mon Sep 17 00:00:00 2001 From: Igor Gorban Date: Fri, 14 Nov 2025 20:05:30 +0100 Subject: [PATCH 25/56] [InstSimplify] Fix crash when optimizing minmax with bitcast constant vectors (#168055) When simplifying min/max intrinsics with fixed-size vector constants, InstructionSimplify attempts to optimize element-wise. However, getAggregateElement() can return null for certain constant expressions like bitcasts, leading to a null pointer dereference. This patch adds a check to bail out of the optimization when getAggregateElement() returns null, preventing the crash while maintaining correct behavior for normal constant vectors. Fixes crash with patterns like: call <2 x half> @llvm.minnum.v2f16(<2 x half> %x, <2 x half> bitcast (<1 x i32> to <2 x half>)) --- llvm/lib/Analysis/InstructionSimplify.cpp | 8 ++- .../Transforms/InstSimplify/fminmax-folds.ll | 70 +++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 2a0a6a2d302b1..6f44713bd22cd 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6985,8 +6985,12 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, // VectorShuffle instruction, which is not allowed in simplifyBinOp. OptResult = MinMaxOptResult::UseEither; for (unsigned i = 0; i != ElemCount.getFixedValue(); ++i) { - auto ElemResult = OptimizeConstMinMax(C->getAggregateElement(i), - IID, Call, &NewConst); + auto *Elt = C->getAggregateElement(i); + if (!Elt) { + OptResult = MinMaxOptResult::CannotOptimize; + break; + } + auto ElemResult = OptimizeConstMinMax(Elt, IID, Call, &NewConst); if (ElemResult == MinMaxOptResult::CannotOptimize || (ElemResult != OptResult && OptResult != MinMaxOptResult::UseEither && diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index 3a03f8627ab68..091e85920c0df 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -885,3 +885,73 @@ define void @minmax_minmax_xy_maxmin_yx(double %x, double %y, ptr %minnum_res, p store double %final_maximumnum, ptr %maximumnum_res ret void } + +;############################################################### +;# Constant Expression Vector Tests # +;############################################################### +; Test that minmax intrinsics with constant expression vectors don't crash +; when getAggregateElement returns null for certain constant expressions. +; These tests cover various scenarios where getAggregateElement() fails: +; - Bitcast from mismatched vector element counts +; - Bitcast from integer to float vectors +; - Bitcast from i64 with different element boundaries + +; Test with bitcast from <1 x i32> to <2 x half> (element count mismatch) +define <2 x half> @minmax_bitcast_v2f16_minnum(<2 x half> %x) { +; CHECK-LABEL: @minmax_bitcast_v2f16_minnum( +; CHECK-NEXT: [[RESULT:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[X:%.*]], <2 x half> bitcast (<1 x i32> splat (i32 1078530011) to <2 x half>)) +; CHECK-NEXT: ret <2 x half> [[RESULT]] +; + %result = call <2 x half> @llvm.minnum.v2f16(<2 x half> %x, <2 x half> bitcast (<1 x i32> to <2 x half>)) + ret <2 x half> %result +} + +; Test with bitcast from <2 x i32> to <4 x half> (different element boundaries) +define <4 x half> @minmax_bitcast_v4f16_maxnum(<4 x half> %x) { +; CHECK-LABEL: @minmax_bitcast_v4f16_maxnum( +; CHECK-NEXT: [[RESULT:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[X:%.*]], <4 x half> bitcast (<2 x i32> to <4 x half>)) +; CHECK-NEXT: ret <4 x half> [[RESULT]] +; + %result = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %x, <4 x half> bitcast (<2 x i32> to <4 x half>)) + ret <4 x half> %result +} + +; Test with bitcast from <1 x i64> to <2 x float> (scalar to vector bitcast) +define <2 x float> @minmax_bitcast_v2f32_minimum(<2 x float> %x) { +; CHECK-LABEL: @minmax_bitcast_v2f32_minimum( +; CHECK-NEXT: [[RESULT:%.*]] = call <2 x float> @llvm.minimum.v2f32(<2 x float> [[X:%.*]], <2 x float> bitcast (<1 x i64> splat (i64 4638564619268087808) to <2 x float>)) +; CHECK-NEXT: ret <2 x float> [[RESULT]] +; + %result = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> bitcast (<1 x i64> to <2 x float>)) + ret <2 x float> %result +} + +; Test with bitcast from <1 x double> to <4 x half> (type size mismatch) +define <4 x half> @minmax_bitcast_v4f16_maximum(<4 x half> %x) { +; CHECK-LABEL: @minmax_bitcast_v4f16_maximum( +; CHECK-NEXT: [[RESULT:%.*]] = call <4 x half> @llvm.maximum.v4f16(<4 x half> [[X:%.*]], <4 x half> bitcast (<1 x double> splat (double 0x400921FB54442D18) to <4 x half>)) +; CHECK-NEXT: ret <4 x half> [[RESULT]] +; + %result = call <4 x half> @llvm.maximum.v4f16(<4 x half> %x, <4 x half> bitcast (<1 x double> to <4 x half>)) + ret <4 x half> %result +} + +; Test with bitcast from <2 x i16> to <2 x half> (integer to float) +define <2 x half> @minmax_bitcast_v2f16_minimumnum(<2 x half> %x) { +; CHECK-LABEL: @minmax_bitcast_v2f16_minimumnum( +; CHECK-NEXT: [[RESULT:%.*]] = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> [[X:%.*]], <2 x half> ) +; CHECK-NEXT: ret <2 x half> [[RESULT]] +; + %result = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %x, <2 x half> bitcast (<2 x i16> to <2 x half>)) + ret <2 x half> %result +} + +; Test with bitcast from <4 x i16> to <4 x half> (matching element count but getAggregateElement may fail) +define <4 x half> @minmax_bitcast_v4f16_maximumnum(<4 x half> %x) { +; CHECK-LABEL: @minmax_bitcast_v4f16_maximumnum( +; CHECK-NEXT: [[RESULT:%.*]] = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> [[X:%.*]], <4 x half> ) +; CHECK-NEXT: ret <4 x half> [[RESULT]] +; + %result = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %x, <4 x half> bitcast (<4 x i16> to <4 x half>)) + ret <4 x half> %result +} From 9fcb67542fcb380caeeea87fd60b8d533e4aa265 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Fri, 14 Nov 2025 11:07:00 -0800 Subject: [PATCH 26/56] [docs] Fix llvm-strip -T flag section (#167987) This was previously under the ELF specific options section, but is actually only supported for Mach-O --- llvm/docs/CommandGuide/llvm-strip.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/docs/CommandGuide/llvm-strip.rst b/llvm/docs/CommandGuide/llvm-strip.rst index b4d6dc08dc1ac..9fd705237d2cc 100644 --- a/llvm/docs/CommandGuide/llvm-strip.rst +++ b/llvm/docs/CommandGuide/llvm-strip.rst @@ -179,6 +179,13 @@ them. segments. Note that many tools will not be able to use an object without section headers. +MACH-O-SPECIFIC OPTIONS +----------------------- + +The following options are implemented only for Mach-O objects. If used with other +objects, :program:`llvm-strip` will either emit an error or silently ignore +them. + .. option:: -T Remove Swift symbols. From 0bdbf2cdfdefbc150686bb379c84109c703bd56a Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Fri, 14 Nov 2025 11:07:13 -0800 Subject: [PATCH 27/56] [docs] Fix invalid header length in llvm-ir2vec.rst (#168104) This also improves the error message to be more clear for folks who haven't used a lot of rst. --- llvm/docs/CommandGuide/llvm-ir2vec.rst | 2 +- llvm/docs/conf.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-ir2vec.rst b/llvm/docs/CommandGuide/llvm-ir2vec.rst index f51da065b43d8..6014df941147e 100644 --- a/llvm/docs/CommandGuide/llvm-ir2vec.rst +++ b/llvm/docs/CommandGuide/llvm-ir2vec.rst @@ -1,5 +1,5 @@ llvm-ir2vec - IR2Vec and MIR2Vec Embedding Generation Tool -=========================================================== +========================================================== .. program:: llvm-ir2vec diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py index d9fa6961032b4..29b1024ff482e 100644 --- a/llvm/docs/conf.py +++ b/llvm/docs/conf.py @@ -273,7 +273,8 @@ def process_rst(name): if len(header) != len(title): print( - "error: invalid header in %r (does not match title)" % file_subpath, + "error: invalid header length in %r (does not match length of title)" + % file_subpath, file=sys.stderr, ) if " - " not in title: From 0190951a3e4f1962b4ee04fdb3cc88fedaeff27b Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Fri, 14 Nov 2025 11:13:57 -0800 Subject: [PATCH 28/56] [AMDGPU] Update buffer fat pointer docs for gfx1250, fix formatting (#167818) --- llvm/docs/AMDGPUUsage.rst | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index b8b372d4113c1..7267f6bb88a58 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1011,9 +1011,9 @@ supported for the ``amdgcn`` target. bounds checking may be disabled, buffer fat pointers may choose to enable it or not). The cache swizzle support introduced in gfx942 may be used. - These pointers can be created by `addrspacecast` from a buffer resource - (`ptr addrspace(8)`) or by using `llvm.amdgcn.make.buffer.rsrc` to produce a - `ptr addrspace(7)` directly, which produces a buffer fat pointer with an initial + These pointers can be created by ``addrspacecast`` from a buffer resource + (``ptr addrspace(8)```) or by using `llvm.amdgcn.make.buffer.rsrc` to produce a + ``ptr addrspace(7)`` directly, which produces a buffer fat pointer with an initial offset of 0 and prevents the address space cast from being rewritten away. The ``align`` attribute on operations from buffer fat pointers is deemed to apply @@ -1028,26 +1028,33 @@ supported for the ``amdgcn`` target. **Buffer Resource** The buffer resource pointer, in address space 8, is the newer form for representing buffer descriptors in AMDGPU IR, replacing their - previous representation as `<4 x i32>`. It is a non-integral pointer - that represents a 128-bit buffer descriptor resource (`V#`). + previous representation as ``<4 x i32>``. It is a non-integral pointer + that represents a 128-bit buffer descriptor resource (``V#``). Since, in general, a buffer resource supports complex addressing modes that cannot be easily represented in LLVM (such as implicit swizzled access to structured - buffers), it is **illegal** to perform non-trivial address computations, such as - ``getelementptr`` operations, on buffer resources. They may be passed to - AMDGPU buffer intrinsics, and they may be converted to and from ``i128``. + buffers), performing address computations such as ``getelementptr`` is not + recommended on ``ptr addrspace(8)``s (if such computations are performed, the + offset must be wavefront-uniform.) Note that such a usage of GEP is currently + **unimplemented** in the backend, as it would require a wrapping 48-bit + addition. Buffer resources may be passed to AMDGPU buffer intrinsics, and they + may be converted to and from ``i128``. Casting a buffer resource to a buffer fat pointer is permitted and adds an offset of 0. Buffer resources can be created from 64-bit pointers (which should be either - generic or global) using the `llvm.amdgcn.make.buffer.rsrc` intrinsic, which + generic or global) using the ``llvm.amdgcn.make.buffer.rsrc`` intrinsic, which takes the pointer, which becomes the base of the resource, the 16-bit stride (and swzizzle control) field stored in bits `63:48` of a `V#`, the 32-bit NumRecords/extent field (bits `95:64`), and the 32-bit flags field (bits `127:96`). The specific interpretation of these fields varies by the target architecture and is detailed in the ISA descriptions. + On gfx1250, the base pointer is instead truncated to 57 bits and the NumRecords + field is 45 bits, which necessitated a change to ``make.buffer.rsrcs``'s arguments + in order to make that field an ``i64``. + When buffer resources are passed to buffer intrinsics such as ``llvm.amdgcn.raw.ptr.buffer.load`` or ``llvm.amdgcn.struct.ptr.buffer.store``, the ``align`` attribute on the @@ -1079,9 +1086,9 @@ supported for the ``amdgcn`` target. the stride is the size of a structured element, the "add tid" flag must be 0, and the swizzle enable bits must be off. - These pointers can be created by `addrspacecast` from a buffer resource - (`ptr addrspace(8)`) or by using `llvm.amdgcn.make.buffer.rsrc` to produce a - `ptr addrspace(9)` directly, which produces a buffer strided pointer whose initial + These pointers can be created by ``addrspacecast`` from a buffer resource + (``ptr addrspace(8)``) or by using ``llvm.amdgcn.make.buffer.rsrc`` to produce a + ``ptr addrspace(9)``` directly, which produces a buffer strided pointer whose initial index and offset values are both 0. This prevents the address space cast from being rewritten away. From 3a08e423f1e7024c849ddc9f97daedfd5a37eb78 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Fri, 14 Nov 2025 11:17:04 -0800 Subject: [PATCH 29/56] Re-land [lldb][NFC] Mark ValueObject library with NO_PLUGIN_DEPENDENCIES (#167933) This is a fixed version of #167886. The build previously failed with `BUILD_SHARED_LIBS=ON`. After trying that locally, I uncovered a few other instances of lldb non-plugin libraries depending on clang transitively through lldbValueObject, so I added the correct clang libraries to their dependencies. --- lldb/source/Commands/CMakeLists.txt | 3 +++ lldb/source/Expression/CMakeLists.txt | 1 + lldb/source/ValueObject/CMakeLists.txt | 4 +--- lldb/source/Version/CMakeLists.txt | 3 +++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lldb/source/Commands/CMakeLists.txt b/lldb/source/Commands/CMakeLists.txt index 69e4c45f0b8e5..f2e62243e999e 100644 --- a/lldb/source/Commands/CMakeLists.txt +++ b/lldb/source/Commands/CMakeLists.txt @@ -58,6 +58,9 @@ add_lldb_library(lldbCommands NO_PLUGIN_DEPENDENCIES lldbUtility lldbValueObject lldbVersion + CLANG_LIBS + clangFrontend + clangSerialization ) add_dependencies(lldbCommands LLDBOptionsGen) diff --git a/lldb/source/Expression/CMakeLists.txt b/lldb/source/Expression/CMakeLists.txt index 08dc536781f9c..515289cd0f091 100644 --- a/lldb/source/Expression/CMakeLists.txt +++ b/lldb/source/Expression/CMakeLists.txt @@ -24,6 +24,7 @@ add_lldb_library(lldbExpression NO_PLUGIN_DEPENDENCIES LINK_COMPONENTS Core + DebugInfoDWARF ExecutionEngine Support LINK_LIBS diff --git a/lldb/source/ValueObject/CMakeLists.txt b/lldb/source/ValueObject/CMakeLists.txt index 2a61407521bec..f0fe7f374a506 100644 --- a/lldb/source/ValueObject/CMakeLists.txt +++ b/lldb/source/ValueObject/CMakeLists.txt @@ -1,4 +1,4 @@ -add_lldb_library(lldbValueObject +add_lldb_library(lldbValueObject NO_PLUGIN_DEPENDENCIES DILAST.cpp DILEval.cpp DILLexer.cpp @@ -34,6 +34,4 @@ add_lldb_library(lldbValueObject lldbSymbol lldbTarget lldbUtility - lldbPluginCPlusPlusLanguage - lldbPluginObjCLanguage ) diff --git a/lldb/source/Version/CMakeLists.txt b/lldb/source/Version/CMakeLists.txt index 8b0acb9ef7550..d179805427007 100644 --- a/lldb/source/Version/CMakeLists.txt +++ b/lldb/source/Version/CMakeLists.txt @@ -40,4 +40,7 @@ add_lldb_library(lldbVersion NO_PLUGIN_DEPENDENCIES ADDITIONAL_HEADERS ${version_inc} ${vcs_version_inc} + + CLANG_LIBS + clangBasic ) From 590ab43e8aeec5762b0f4b722993ba0faf710c55 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 14 Nov 2025 11:19:21 -0800 Subject: [PATCH 30/56] RuntimeLibcalls: Move VectorLibrary handling into TargetOptions (#167996) This fixes the -fveclib flag getting lost on its way to the backend. Previously this was its own cl::opt with a random boolean. Move the flag handling into CommandFlags with other backend ABI-ish options, and have clang directly set it, rather than forcing it to go through command line parsing. Prior to de68181d7f, codegen used TargetLibraryInfo to find the vector function. Clang has special handling for TargetLibraryInfo, where it would directly construct one with the vector library in the pass pipeline. RuntimeLibcallsInfo currently is not used as an analysis in codegen, and needs to know the vector library when constructed. RuntimeLibraryAnalysis could follow the same trick that TargetLibraryInfo is using in the future, but a lot more boilerplate changes are needed to thread that analysis through codegen. Ideally this would come from an IR module flag, and nothing would be in TargetOptions. For now, it's better for all of these sorts of controls to be consistent. --- clang/lib/CodeGen/BackendUtil.cpp | 31 +++++++++++++++++ cross-project-tests/CMakeLists.txt | 7 ++++ cross-project-tests/veclib/lit.local.cfg | 2 ++ cross-project-tests/veclib/veclib-sincos.c | 21 ++++++++++++ .../include/llvm/Analysis/TargetLibraryInfo.h | 5 +-- llvm/include/llvm/CodeGen/CommandFlags.h | 2 ++ llvm/include/llvm/IR/RuntimeLibcalls.h | 4 ++- llvm/include/llvm/IR/SystemLibraries.h | 5 --- llvm/include/llvm/Target/TargetOptions.h | 4 +++ llvm/lib/Analysis/TargetLibraryInfo.cpp | 15 ++++---- llvm/lib/CodeGen/CommandFlags.cpp | 24 +++++++++++++ llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +- llvm/lib/IR/CMakeLists.txt | 1 - llvm/lib/IR/RuntimeLibcalls.cpp | 5 +-- llvm/lib/IR/SystemLibraries.cpp | 34 ------------------- llvm/tools/llc/llc.cpp | 3 +- llvm/tools/opt/optdriver.cpp | 9 ++++- llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn | 1 - 18 files changed, 121 insertions(+), 55 deletions(-) create mode 100644 cross-project-tests/veclib/lit.local.cfg create mode 100644 cross-project-tests/veclib/veclib-sincos.c delete mode 100644 llvm/lib/IR/SystemLibraries.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index b967a26dd19d7..f1e20403ad668 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -481,6 +481,36 @@ static bool initTargetOptions(const CompilerInstance &CI, Options.JMCInstrument = CodeGenOpts.JMCInstrument; Options.XCOFFReadOnlyPointers = CodeGenOpts.XCOFFReadOnlyPointers; + switch (CodeGenOpts.getVecLib()) { + case llvm::driver::VectorLibrary::NoLibrary: + Options.VectorLibrary = llvm::VectorLibrary::NoLibrary; + break; + case llvm::driver::VectorLibrary::Accelerate: + Options.VectorLibrary = llvm::VectorLibrary::Accelerate; + break; + case llvm::driver::VectorLibrary::Darwin_libsystem_m: + Options.VectorLibrary = llvm::VectorLibrary::DarwinLibSystemM; + break; + case llvm::driver::VectorLibrary::LIBMVEC: + Options.VectorLibrary = llvm::VectorLibrary::LIBMVEC; + break; + case llvm::driver::VectorLibrary::MASSV: + Options.VectorLibrary = llvm::VectorLibrary::MASSV; + break; + case llvm::driver::VectorLibrary::SVML: + Options.VectorLibrary = llvm::VectorLibrary::SVML; + break; + case llvm::driver::VectorLibrary::SLEEF: + Options.VectorLibrary = llvm::VectorLibrary::SLEEFGNUABI; + break; + case llvm::driver::VectorLibrary::ArmPL: + Options.VectorLibrary = llvm::VectorLibrary::ArmPL; + break; + case llvm::driver::VectorLibrary::AMDLIBM: + Options.VectorLibrary = llvm::VectorLibrary::AMDLIBM; + break; + } + switch (CodeGenOpts.getSwiftAsyncFramePointer()) { case CodeGenOptions::SwiftAsyncFramePointerKind::Auto: Options.SwiftAsyncFramePointer = @@ -584,6 +614,7 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts, BackendArgs.push_back("-limit-float-precision"); BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); } + // Check for the default "clang" invocation that won't set any cl::opt values. // Skip trying to parse the command line invocation to avoid the issues // described below. diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt index 8e94579736537..3f932b8c7fd22 100644 --- a/cross-project-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -104,6 +104,13 @@ add_lit_testsuite(check-cross-dtlto "Running DTLTO cross-project tests" DEPENDS ${CROSS_PROJECT_TEST_DEPS} ) +# veclib tests. +add_lit_testsuite(check-cross-veclib "Running veclib cross-project tests" + ${CMAKE_CURRENT_BINARY_DIR}/veclib + EXCLUDE_FROM_CHECK_ALL + DEPENDS ${CROSS_PROJECT_TEST_DEPS} + ) + # Add check-cross-project-* targets. add_lit_testsuites(CROSS_PROJECT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${CROSS_PROJECT_TEST_DEPS} diff --git a/cross-project-tests/veclib/lit.local.cfg b/cross-project-tests/veclib/lit.local.cfg new file mode 100644 index 0000000000000..530f4c01646ff --- /dev/null +++ b/cross-project-tests/veclib/lit.local.cfg @@ -0,0 +1,2 @@ +if "clang" not in config.available_features: + config.unsupported = True diff --git a/cross-project-tests/veclib/veclib-sincos.c b/cross-project-tests/veclib/veclib-sincos.c new file mode 100644 index 0000000000000..657d0df199522 --- /dev/null +++ b/cross-project-tests/veclib/veclib-sincos.c @@ -0,0 +1,21 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang -S -target aarch64-unknown-linux-gnu -O2 -fno-math-errno \ +// RUN: -fveclib=ArmPL -o - %s | FileCheck -check-prefix=ARMPL %s +// RUN: %clang -S -target aarch64-unknown-linux-gnu -O2 -fno-math-errno \ +// RUN: -fveclib=SLEEF -o - %s | FileCheck -check-prefix=SLEEF %s + +typedef __SIZE_TYPE__ size_t; + +void sincos(double, double *, double *); + +// ARMPL: armpl_vsincosq_f64 +// ARMPL: armpl_vsincosq_f64 + +// SLEEF: _ZGVnN2vl8l8_sincos +// SLEEF: _ZGVnN2vl8l8_sincos +void vectorize_sincos(double *restrict x, double *restrict s, + double *restrict c, size_t n) { + for (size_t i = 0; i < n; ++i) { + sincos(x[i], &s[i], &c[i]); + } +} diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 78954431e81c3..3b6cc0d1944fd 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -14,6 +14,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/TargetParser/Triple.h" @@ -23,7 +24,6 @@ namespace llvm { template class ArrayRef; -enum class VectorLibrary; /// Provides info so a possible vectorization of a function can be /// computed. Function 'VectorFnName' is equivalent to 'ScalarFnName' @@ -119,7 +119,8 @@ class TargetLibraryInfoImpl { public: TargetLibraryInfoImpl() = delete; - LLVM_ABI explicit TargetLibraryInfoImpl(const Triple &T); + LLVM_ABI explicit TargetLibraryInfoImpl( + const Triple &T, VectorLibrary VecLib = VectorLibrary::NoLibrary); // Provide value semantics. LLVM_ABI TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI); diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 59aacc75e055d..6a907b64542ae 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -125,6 +125,8 @@ LLVM_ABI llvm::EABI getEABIVersion(); LLVM_ABI llvm::DebuggerKind getDebuggerTuningOpt(); +LLVM_ABI llvm::VectorLibrary getVectorLibrary(); + LLVM_ABI bool getEnableStackSizeSection(); LLVM_ABI bool getEnableAddrsig(); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 0afe32a4ecc3c..cf96547063cd0 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -23,6 +23,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -83,7 +84,8 @@ struct RuntimeLibcallsInfo { const Triple &TT, ExceptionHandling ExceptionModel = ExceptionHandling::None, FloatABI::ABIType FloatABI = FloatABI::Default, - EABI EABIVersion = EABI::Default, StringRef ABIName = ""); + EABI EABIVersion = EABI::Default, StringRef ABIName = "", + VectorLibrary VecLib = VectorLibrary::NoLibrary); explicit RuntimeLibcallsInfo(const Module &M); diff --git a/llvm/include/llvm/IR/SystemLibraries.h b/llvm/include/llvm/IR/SystemLibraries.h index 1713b07c1c86f..5bdf67642e0e4 100644 --- a/llvm/include/llvm/IR/SystemLibraries.h +++ b/llvm/include/llvm/IR/SystemLibraries.h @@ -29,11 +29,6 @@ enum class VectorLibrary { AMDLIBM // AMD Math Vector library. }; -/// Command line flag value for the vector math library to use -/// -/// FIXME: This should come from a module flag, and not be mutually exclusive -extern VectorLibrary ClVectorLibrary; - } // namespace llvm #endif // LLVM_IR_SYSTEMLIBRARIES_H diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index bfd2817b8d1f5..b9258c0fee692 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_TARGETOPTIONS_H #include "llvm/ADT/FloatingPointMode.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -409,6 +410,9 @@ class TargetOptions { /// Which debugger to tune for. DebuggerKind DebuggerTuning = DebuggerKind::Default; + /// Vector math library to use. + VectorLibrary VectorLibrary = VectorLibrary::NoLibrary; + private: /// Flushing mode to assume in default FP environment. DenormalMode FPDenormalMode; diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index f97abc9a32707..26d0c108fb03a 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -160,7 +160,8 @@ static void initializeBase(TargetLibraryInfoImpl &TLI, const Triple &T) { /// target triple. This should be carefully written so that a missing target /// triple gets a sane set of defaults. static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, - ArrayRef StandardNames) { + ArrayRef StandardNames, + VectorLibrary VecLib) { // Set IO unlocked variants as unavailable // Set them as available per system below TLI.setUnavailable(LibFunc_getc_unlocked); @@ -924,23 +925,25 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, if (T.isOSAIX()) TLI.setUnavailable(LibFunc_memrchr); - TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T); + TLI.addVectorizableFunctionsFromVecLib(VecLib, T); } /// Initialize the set of available library functions based on the specified /// target triple. This should be carefully written so that a missing target /// triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - ArrayRef StandardNames) { + ArrayRef StandardNames, + VectorLibrary VecLib) { initializeBase(TLI, T); - initializeLibCalls(TLI, T, StandardNames); + initializeLibCalls(TLI, T, StandardNames, VecLib); } -TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { +TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T, + VectorLibrary VecLib) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, T, StandardNames); + initialize(*this, T, StandardNames, VecLib); } TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index cf225f1f03eac..02a6bb9357ad0 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -107,6 +107,7 @@ CGOPT(bool, UniqueBasicBlockSectionNames) CGOPT(bool, SeparateNamedSections) CGOPT(EABI, EABIVersion) CGOPT(DebuggerKind, DebuggerTuningOpt) +CGOPT(VectorLibrary, VectorLibrary) CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) CGOPT(bool, EnableCallGraphSection) @@ -451,6 +452,28 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); CGBINDOPT(DebuggerTuningOpt); + static cl::opt VectorLibrary( + "vector-library", cl::Hidden, cl::desc("Vector functions library"), + cl::init(VectorLibrary::NoLibrary), + cl::values( + clEnumValN(VectorLibrary::NoLibrary, "none", + "No vector functions library"), + clEnumValN(VectorLibrary::Accelerate, "Accelerate", + "Accelerate framework"), + clEnumValN(VectorLibrary::DarwinLibSystemM, "Darwin_libsystem_m", + "Darwin libsystem_m"), + clEnumValN(VectorLibrary::LIBMVEC, "LIBMVEC", + "GLIBC Vector Math library"), + clEnumValN(VectorLibrary::MASSV, "MASSV", "IBM MASS vector library"), + clEnumValN(VectorLibrary::SVML, "SVML", "Intel SVML library"), + clEnumValN(VectorLibrary::SLEEFGNUABI, "sleefgnuabi", + "SIMD Library for Evaluating Elementary Functions"), + clEnumValN(VectorLibrary::ArmPL, "ArmPL", + "Arm Performance Libraries"), + clEnumValN(VectorLibrary::AMDLIBM, "AMDLIBM", + "AMD vector math library"))); + CGBINDOPT(VectorLibrary); + static cl::opt EnableStackSizeSection( "stack-size-section", cl::desc("Emit a section containing stack size metadata"), @@ -609,6 +632,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EnableTLSDESC = getExplicitEnableTLSDESC().value_or(TheTriple.hasDefaultTLSDESC()); Options.ExceptionModel = getExceptionModel(); + Options.VectorLibrary = getVectorLibrary(); Options.EmitStackSizeSection = getEnableStackSizeSection(); Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EnableStaticDataPartitioning = getEnableStaticDataPartitioning(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 77d9b156e2672..0f1e37bbf1bfc 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -753,7 +753,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm), RuntimeLibcallInfo(TM.getTargetTriple(), TM.Options.ExceptionModel, TM.Options.FloatABIType, TM.Options.EABIVersion, - TM.Options.MCOptions.getABIName()), + TM.Options.MCOptions.getABIName(), + TM.Options.VectorLibrary), Libcalls(RuntimeLibcallInfo) { initActions(); diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index ebdc2ca08d102..10572ff708bd3 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -67,7 +67,6 @@ add_llvm_component_library(LLVMCore ReplaceConstant.cpp Statepoint.cpp StructuralHash.cpp - SystemLibraries.cpp Type.cpp TypedPointerType.cpp TypeFinder.cpp diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index ee23b58742b64..12d050329a302 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -29,7 +29,8 @@ using namespace RTLIB; RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, ExceptionHandling ExceptionModel, FloatABI::ABIType FloatABI, - EABI EABIVersion, StringRef ABIName) { + EABI EABIVersion, StringRef ABIName, + VectorLibrary VecLib) { // FIXME: The ExceptionModel parameter is to handle the field in // TargetOptions. This interface fails to distinguish the forced disable // case for targets which support exceptions by default. This should @@ -40,7 +41,7 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); // TODO: Tablegen should generate these sets - switch (ClVectorLibrary) { + switch (VecLib) { case VectorLibrary::SLEEFGNUABI: for (RTLIB::LibcallImpl Impl : {RTLIB::impl__ZGVnN2vl8_modf, RTLIB::impl__ZGVnN4vl4_modff, diff --git a/llvm/lib/IR/SystemLibraries.cpp b/llvm/lib/IR/SystemLibraries.cpp deleted file mode 100644 index fa4ac2adb7296..0000000000000 --- a/llvm/lib/IR/SystemLibraries.cpp +++ /dev/null @@ -1,34 +0,0 @@ -//===-----------------------------------------------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/SystemLibraries.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -VectorLibrary llvm::ClVectorLibrary; - -static cl::opt ClVectorLibraryOpt( - "vector-library", cl::Hidden, cl::desc("Vector functions library"), - cl::location(llvm::ClVectorLibrary), cl::init(VectorLibrary::NoLibrary), - cl::values( - clEnumValN(VectorLibrary::NoLibrary, "none", - "No vector functions library"), - clEnumValN(VectorLibrary::Accelerate, "Accelerate", - "Accelerate framework"), - clEnumValN(VectorLibrary::DarwinLibSystemM, "Darwin_libsystem_m", - "Darwin libsystem_m"), - clEnumValN(VectorLibrary::LIBMVEC, "LIBMVEC", - "GLIBC Vector Math library"), - clEnumValN(VectorLibrary::MASSV, "MASSV", "IBM MASS vector library"), - clEnumValN(VectorLibrary::SVML, "SVML", "Intel SVML library"), - clEnumValN(VectorLibrary::SLEEFGNUABI, "sleefgnuabi", - "SIMD Library for Evaluating Elementary Functions"), - clEnumValN(VectorLibrary::ArmPL, "ArmPL", "Arm Performance Libraries"), - clEnumValN(VectorLibrary::AMDLIBM, "AMDLIBM", - "AMD vector math library"))); diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 2147945d8a416..1cfedad15ec35 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -696,7 +696,8 @@ static int compileModule(char **argv, LLVMContext &Context, } // Add an appropriate TargetLibraryInfo pass for the module's triple. - TargetLibraryInfoImpl TLII(M->getTargetTriple()); + TargetLibraryInfoImpl TLII(M->getTargetTriple(), + Target->Options.VectorLibrary); // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 4cf117f227c00..c65cae54b9530 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -670,8 +670,15 @@ optMain(int argc, char **argv, M->addModuleFlag(Module::Error, "UnifiedLTO", 1); } + VectorLibrary VecLib = codegen::getVectorLibrary(); // Add an appropriate TargetLibraryInfo pass for the module's triple. - TargetLibraryInfoImpl TLII(ModuleTriple); + TargetLibraryInfoImpl TLII(ModuleTriple, VecLib); + + RTLIB::RuntimeLibcallsInfo RTLCI(ModuleTriple, codegen::getExceptionModel(), + codegen::getFloatABIForCalls(), + codegen::getEABIVersion(), + "", // FIXME: Get ABI name from MCOptions + VecLib); // FIXME: Get ABI name from MCOptions RTLIB::RuntimeLibcallsInfo RTLCI(ModuleTriple, codegen::getExceptionModel(), diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index 8037c8d693cb8..22aa0b6418132 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -82,7 +82,6 @@ static_library("IR") { "SafepointIRVerifier.cpp", "Statepoint.cpp", "StructuralHash.cpp", - "SystemLibraries.cpp", "Type.cpp", "TypeFinder.cpp", "TypedPointerType.cpp", From 8b596224258dae1fc671d70e4f2072e162cd4f59 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Fri, 14 Nov 2025 11:22:05 -0800 Subject: [PATCH 31/56] Add the ability to load DWARF64 .debug_str_offsets tables for DWARF32 DWARF units in .dwp files. (#167986) This path is updating the reading capabilities of the LLVM DWARF parser for a llvm-dwp patch https://github.com/llvm/llvm-project/pull/167457 that will emit .dwp files where the compile units are DWARF32 and the .debug_str_offsets tables will be emitted as DWARF64 to allow .debug_str sections that exceed 4GB in size. --- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 10 ++- .../dwarfdump-dwp-str-offsets-64.yaml | 88 +++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index da0bf03e1ac57..b8fbdfc8c1d70 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -1187,9 +1187,15 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA) { if (getVersion() >= 5) { if (DA.getData().data() == nullptr) return std::nullopt; - Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16; + // FYI: The .debug_str_offsets.dwo section may use DWARF64 even when the + // rest of the file uses DWARF32, so respect whichever encoding the + // header/length uses. + uint64_t Length = 0; + DwarfFormat Format = dwarf::DwarfFormat::DWARF32; + std::tie(Length, Format) = DA.getInitialLength(&Offset); + Offset += 4; // Skip the DWARF version uint16_t and the uint16_t padding. // Look for a valid contribution at the given offset. - auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset); + auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Format, Offset); if (!DescOrError) return DescOrError.takeError(); return *DescOrError; diff --git a/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml b/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml new file mode 100644 index 0000000000000..3820ca7184d62 --- /dev/null +++ b/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml @@ -0,0 +1,88 @@ +# This YAML file will create a .dwp file that has a DWARF32 compile unit whose +# .debug_str_offsets.dwo is in DWARF64 format. This test verifies that +# llvm-dwarfdump can read the strings correctly and dump the +# .debug_str_offsets.dwo info correctly. This paves the way for llvm-dwp to +# promote some .debug_str_offsets tables for .dwo files to be DWARF64 and will +# allow the .debug_str section to be larger than UINT32_MAX size in bytes +# without losing data. + +# RUN: yaml2obj %s -o %t.dwp +# RUN: llvm-dwarfdump --debug-str-offsets --debug-info %t.dwp | FileCheck %s + +# CHECK: 0x00000000: Compile Unit: length = 0x0000002a, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_compile, abbr_offset = 0x0000, addr_size = 0x08, DWO_id = 0x1158980a3c2f811b (next unit at 0x0000002e) + +# CHECK: 0x00000014: DW_TAG_compile_unit +# CHECK-NEXT: DW_AT_producer ("Apple clang version 17.0.0 (clang-1700.4.4.1)") +# CHECK-NEXT: DW_AT_language (DW_LANG_C_plus_plus_14) +# CHECK-NEXT: DW_AT_name ("main.minimal.cpp") +# CHECK-NEXT: DW_AT_dwo_name ("main.minimal.dwo") + +# CHECK: 0x0000001a: DW_TAG_subprogram +# CHECK-NEXT: DW_AT_low_pc (indexed (00000000) address = ) +# CHECK-NEXT: DW_AT_high_pc (0x0000000f) +# CHECK-NEXT: DW_AT_frame_base (DW_OP_reg6 RBP) +# CHECK-NEXT: DW_AT_name ("main") +# CHECK-NEXT: DW_AT_decl_file (0x00) +# CHECK-NEXT: DW_AT_decl_line (1) +# CHECK-NEXT: DW_AT_type (0x00000029 "int") +# CHECK-NEXT: DW_AT_external (true) + +# CHECK: 0x00000029: DW_TAG_base_type +# CHECK-NEXT: DW_AT_name ("int") +# CHECK-NEXT: DW_AT_encoding (DW_ATE_signed) +# CHECK-NEXT: DW_AT_byte_size (0x04) + +# CHECK: 0x0000002d: NULL + +# CHECK: .debug_str_offsets.dwo contents: +# CHECK-NEXT: 0x00000000: Contribution size = 44, Format = DWARF64, Version = 5 +# CHECK-NEXT: 0x00000010: 0000000000000000 "main" +# CHECK-NEXT: 0x00000018: 0000000000000005 "int" +# CHECK-NEXT: 0x00000020: 0000000000000009 "Apple clang version 17.0.0 (clang-1700.4.4.1)" +# CHECK-NEXT: 0x00000028: 0000000000000037 "main.minimal.cpp" +# CHECK-NEXT: 0x00000030: 0000000000000048 "main.minimal.dwo" + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .debug_abbrev.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 01110125251305032576250000022E00111B1206401803253A0B3B0B49133F19000003240003253E0B0B0B000000 + - Name: .debug_str.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 6D61696E00696E74004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E3129006D61696E2E6D696E696D616C2E637070006D61696E2E6D696E696D616C2E64776F00 + - Name: .debug_str_offsets.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 'FFFFFFFF2C000000000000000500000000000000000000000500000000000000090000000000000037000000000000004800000000000000' + - Name: .debug_info.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 2A00000005000508000000001B812F3C0A98581101022100030402000F0000000156000001290000000301050400 + - Name: .debug_cu_index + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 0500000003000000010000000200000000000000000000001B812F3C0A98581100000000010000000100000003000000060000000000000000000000000000002E0000002E0000001C000000 + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .debug_abbrev.dwo + - Name: .debug_str.dwo + - Name: .debug_str_offsets.dwo + - Name: .debug_info.dwo + - Name: .debug_cu_index + - Name: .symtab +Symbols: [] +... From a407d02752f9d28fe01dd2fe5cdc12344ab38753 Mon Sep 17 00:00:00 2001 From: Gang Chen Date: Fri, 14 Nov 2025 11:49:09 -0800 Subject: [PATCH 32/56] =?UTF-8?q?Revert=20"[Transform][LoadStoreVectorizer?= =?UTF-8?q?]=20allow=20redundant=20in=20Chain=20(#1=E2=80=A6=20(#168105)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …63019)" This reverts commit 92e5608ffa6ff39ac3707f29418cc9482471f5d9. --- .../Vectorize/LoadStoreVectorizer.cpp | 76 +++----- .../AMDGPU/GlobalISel/irtranslator-call.ll | 9 +- .../branch-folding-implicit-def-subreg.ll | 51 +++--- llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll | 33 +++- .../AMDGPU/divergence-driven-trunc-to-i1.ll | 2 +- ...cannot-create-empty-or-backward-segment.ll | 18 +- .../AMDGPU/fmul-2-combine-multi-use.ll | 148 ++++++++------- llvm/test/CodeGen/AMDGPU/mad_uint24.ll | 8 +- llvm/test/CodeGen/AMDGPU/sad.ll | 18 +- .../AMDGPU/simplifydemandedbits-recursion.ll | 8 +- .../AMDGPU/splitkit-getsubrangeformask.ll | 171 +++++++++--------- .../AMDGPU/multiple_tails.ll | 97 ++++------ .../AMDGPU/vect-ptr-ptr-size-mismatch.ll | 5 +- .../X86/subchain-interleaved.ll | 4 +- 14 files changed, 328 insertions(+), 320 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 47bcfa467615b..7b5137b0185ab 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -626,35 +626,26 @@ std::vector Vectorizer::splitChainByContiguity(Chain &C) { std::vector Ret; Ret.push_back({C.front()}); - unsigned ElemBytes = DL.getTypeStoreSize(getChainElemTy(C)); - APInt PrevReadEnd = C[0].OffsetFromLeader + - DL.getTypeStoreSize(getLoadStoreType(&*C[0].Inst)); for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) { // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd). auto &CurChain = Ret.back(); - unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst)); + const ChainElem &Prev = CurChain.back(); + unsigned SzBits = DL.getTypeSizeInBits(getLoadStoreType(&*Prev.Inst)); + assert(SzBits % 8 == 0 && "Non-byte sizes should have been filtered out by " + "collectEquivalenceClass"); + APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8; // Add this instruction to the end of the current chain, or start a new one. - assert(SzBytes % ElemBytes == 0); - APInt ReadEnd = It->OffsetFromLeader + SzBytes; - // Allow redundancy: partial or full overlap counts as contiguous. - bool AreContiguous = false; - if (It->OffsetFromLeader.sle(PrevReadEnd)) { - uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue(); - if (Overlap % ElemBytes == 0) - AreContiguous = true; - } - - LLVM_DEBUG(dbgs() << "LSV: Instruction is " - << (AreContiguous ? "contiguous" : "chain-breaker") - << *It->Inst << " (starts at offset " + bool AreContiguous = It->OffsetFromLeader == PrevReadEnd; + LLVM_DEBUG(dbgs() << "LSV: Instructions are " + << (AreContiguous ? "" : "not ") << "contiguous: " + << *Prev.Inst << " (ends at offset " << PrevReadEnd + << ") -> " << *It->Inst << " (starts at offset " << It->OffsetFromLeader << ")\n"); - if (AreContiguous) CurChain.push_back(*It); else Ret.push_back({*It}); - PrevReadEnd = APIntOps::smax(PrevReadEnd, ReadEnd); } // Filter out length-1 chains, these are uninteresting. @@ -883,24 +874,15 @@ bool Vectorizer::vectorizeChain(Chain &C) { Type *VecElemTy = getChainElemTy(C); bool IsLoadChain = isa(C[0].Inst); unsigned AS = getLoadStoreAddressSpace(C[0].Inst); - unsigned BytesAdded = DL.getTypeStoreSize(getLoadStoreType(&*C[0].Inst)); - APInt PrevReadEnd = C[0].OffsetFromLeader + BytesAdded; - unsigned ChainBytes = BytesAdded; - for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) { - unsigned SzBytes = DL.getTypeStoreSize(getLoadStoreType(&*It->Inst)); - APInt ReadEnd = It->OffsetFromLeader + SzBytes; - // Update ChainBytes considering possible overlap. - BytesAdded = - PrevReadEnd.sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0; - ChainBytes += BytesAdded; - PrevReadEnd = APIntOps::smax(PrevReadEnd, ReadEnd); - } - + unsigned ChainBytes = std::accumulate( + C.begin(), C.end(), 0u, [&](unsigned Bytes, const ChainElem &E) { + return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst)); + }); assert(ChainBytes % DL.getTypeStoreSize(VecElemTy) == 0); // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller // than 1 byte (e.g. VecTy == <32 x i1>). - unsigned NumElem = 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy); - Type *VecTy = FixedVectorType::get(VecElemTy, NumElem); + Type *VecTy = FixedVectorType::get( + VecElemTy, 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy)); Align Alignment = getLoadStoreAlignment(C[0].Inst); // If this is a load/store of an alloca, we might have upgraded the alloca's @@ -927,31 +909,27 @@ bool Vectorizer::vectorizeChain(Chain &C) { llvm::min_element(C, [](const auto &A, const auto &B) { return A.Inst->comesBefore(B.Inst); })->Inst); - // This can happen due to a chain of redundant loads. - // In this case, just use the element-type, and avoid ExtractElement. - if (NumElem == 1) - VecTy = VecElemTy; + // Chain is in offset order, so C[0] is the instr with the lowest offset, // i.e. the root of the vector. VecInst = Builder.CreateAlignedLoad(VecTy, getLoadStorePointerOperand(C[0].Inst), Alignment); + unsigned VecIdx = 0; for (const ChainElem &E : C) { Instruction *I = E.Inst; Value *V; Type *T = getLoadStoreType(I); - int EOffset = (E.OffsetFromLeader - C[0].OffsetFromLeader).getSExtValue(); - int VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy); if (auto *VT = dyn_cast(T)) { auto Mask = llvm::to_vector<8>( llvm::seq(VecIdx, VecIdx + VT->getNumElements())); V = Builder.CreateShuffleVector(VecInst, Mask, I->getName()); - } else if (VecTy != VecElemTy) { + VecIdx += VT->getNumElements(); + } else { V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx), I->getName()); - } else { - V = VecInst; + ++VecIdx; } if (V->getType() != I->getType()) V = Builder.CreateBitOrPointerCast(V, I->getType()); @@ -986,24 +964,22 @@ bool Vectorizer::vectorizeChain(Chain &C) { // Build the vector to store. Value *Vec = PoisonValue::get(VecTy); - auto InsertElem = [&](Value *V, unsigned VecIdx) { + unsigned VecIdx = 0; + auto InsertElem = [&](Value *V) { if (V->getType() != VecElemTy) V = Builder.CreateBitOrPointerCast(V, VecElemTy); - Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx)); + Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++)); }; for (const ChainElem &E : C) { auto *I = cast(E.Inst); - int EOffset = (E.OffsetFromLeader - C[0].OffsetFromLeader).getSExtValue(); - int VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy); if (FixedVectorType *VT = dyn_cast(getLoadStoreType(I))) { for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) { InsertElem(Builder.CreateExtractElement(I->getValueOperand(), - Builder.getInt32(J)), - VecIdx++); + Builder.getInt32(J))); } } else { - InsertElem(I->getValueOperand(), VecIdx); + InsertElem(I->getValueOperand()); } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index c935310584949..4e70c15df5741 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -3850,9 +3850,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) poison`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) poison`, addrspace 1) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[LOAD2]](s32) - ; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p5) = G_INTTOPTR [[LOAD2]](s32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `ptr addrspace(1) poison`, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) poison`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3881,10 +3880,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[INTTOPTR]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[INTTOPTR1]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index aaf7be9ffe112..5c526c78afcd7 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -13,15 +13,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec - ; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg3.kernarg.offset.align.down, align 8, addrspace 4) + ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4) + ; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4) + ; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4) ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4) - ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4) - ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 0, implicit-def $scc + ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_XOR_B64 renamable $sgpr12_sgpr13, -1, implicit-def dead $scc - ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 8, implicit-def $scc + ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 8, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_XOR_B64 killed renamable $sgpr30_sgpr31, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $vgpr5 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec @@ -32,7 +33,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1.bb103: ; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc @@ -40,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF implicit-def $vgpr16 ; GFX90A-NEXT: renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2 @@ -51,7 +52,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3.Flow17: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr6 = V_AND_B32_e32 1023, $vgpr31, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc @@ -59,7 +60,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.bb15: ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec @@ -358,7 +359,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.35.bb20: ; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23) ; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec @@ -405,7 +406,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.37.bb27: ; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30) ; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec @@ -458,7 +459,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.39.bb34: ; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37) ; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec @@ -509,7 +510,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.41.bb41: ; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec @@ -558,9 +559,9 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.43.bb55: ; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc + ; GFX90A-NEXT: S_BITCMP1_B32 killed renamable $sgpr17, 16, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec @@ -606,7 +607,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.46.bb48: ; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc @@ -764,10 +765,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec ; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr7 = COPY renamable $sgpr21, implicit $exec - ; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr7, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3) + ; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr7, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3) ; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr3, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3) ; GFX90A-NEXT: renamable $vgpr3 = COPY renamable $sgpr22, implicit $exec - ; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr3, 0, 0, implicit $exec :: (load (s64) from %ir.5, addrspace 3) + ; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr3, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3) ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_LSHR_B64 killed renamable $sgpr56_sgpr57, 1, implicit-def dead $scc ; GFX90A-NEXT: renamable $vgpr18_vgpr19 = V_LSHRREV_B64_e64 1, $vgpr24_vgpr25, implicit $exec ; GFX90A-NEXT: renamable $vgpr7 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec @@ -816,18 +817,18 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.58.bb105: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr28_vgpr29 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec - ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3) - ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr17, implicit $exec + ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3) + ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr33, implicit $exec ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3) ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec - ; GFX90A-NEXT: renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.5, addrspace 3) + ; GFX90A-NEXT: renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3) ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1 ; GFX90A-NEXT: S_BRANCH %bb.3 ; GFX90A-NEXT: {{ $}} @@ -971,13 +972,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr35, implicit $exec ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr35, renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3) ; GFX90A-NEXT: renamable $vgpr5 = COPY renamable $sgpr21, implicit $exec - ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into %ir.4, addrspace 3) + ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3) ; GFX90A-NEXT: renamable $vgpr16 = COPY killed renamable $sgpr22, implicit $exec - ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr16, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.5, addrspace 3) + ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr16, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3) ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr35, killed renamable $vgpr52_vgpr53, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3) - ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.4, addrspace 3) + ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr5, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3) ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr35, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3) - ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr5, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec :: (store (s64) into %ir.4, addrspace 3) + ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr5, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3) ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr35, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index 687205a11f4bd..dae77d19c1235 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -593,10 +593,14 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_short off, v0, s0 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, s0 -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; FLATSCR-NEXT: scratch_load_ushort v0, off, s0 offset:2 +; FLATSCR-NEXT: scratch_load_ushort v3, off, s0 +; FLATSCR-NEXT: s_waitcnt vmcnt(1) +; FLATSCR-NEXT: v_mov_b32_e32 v1, v0 ; FLATSCR-NEXT: scratch_load_short_d16_hi v1, off, s0 offset:4 +; FLATSCR-NEXT: s_mov_b32 s0, 0x5040100 +; FLATSCR-NEXT: s_waitcnt vmcnt(1) +; FLATSCR-NEXT: v_perm_b32 v0, v0, v3, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; FLATSCR-NEXT: s_endpgm @@ -656,9 +660,13 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s0 offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s0 +; FLATSCR_GFX10-NEXT: s_clause 0x1 +; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, off, s0 offset:2 +; FLATSCR_GFX10-NEXT: scratch_load_ushort v3, off, s0 +; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(1) +; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v1, v0 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; FLATSCR_GFX10-NEXT: v_perm_b32 v0, v0, v3, 0x5040100 ; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, off, s0 offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -681,9 +689,12 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, off +; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v3, off, off offset:2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v3 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l +; GFX11-TRUE16-NEXT: s_clause 0x1 +; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, off ; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[2:3] @@ -706,9 +717,13 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, off +; GFX11-FAKE16-NEXT: s_clause 0x1 +; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, off offset:2 +; GFX11-FAKE16-NEXT: scratch_load_u16 v3, off, off +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100 ; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll index 58adb220d7b2f..3303cb86c874e 100644 --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset.align.down, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down, addrspace 4) ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll index 5c91ee3f7e748..72913d2596ebf 100644 --- a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll +++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll @@ -6,26 +6,28 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_mov_b64 s[26:27], s[2:3] ; CHECK-NEXT: s_mov_b64 s[24:25], s[0:1] +; CHECK-NEXT: s_load_dword s2, s[8:9], 0x0 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; CHECK-NEXT: s_load_dword s6, s[8:9], 0x4 ; CHECK-NEXT: s_add_u32 s24, s24, s17 ; CHECK-NEXT: s_addc_u32 s25, s25, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_bitcmp1_b32 s0, 0 -; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 -; CHECK-NEXT: s_bitcmp1_b32 s0, 8 +; CHECK-NEXT: s_bitcmp1_b32 s2, 0 +; CHECK-NEXT: s_cselect_b64 s[16:17], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s2, 8 ; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0 -; CHECK-NEXT: s_bitcmp1_b32 s0, 16 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] +; CHECK-NEXT: s_bitcmp1_b32 s2, 16 ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 ; CHECK-NEXT: s_bitcmp1_b32 s0, 24 ; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 ; CHECK-NEXT: s_xor_b64 s[4:5], s[8:9], -1 ; CHECK-NEXT: s_bitcmp1_b32 s1, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] ; CHECK-NEXT: s_cselect_b64 s[12:13], -1, 0 -; CHECK-NEXT: s_bitcmp1_b32 s1, 8 +; CHECK-NEXT: s_bitcmp1_b32 s6, 8 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[16:17] ; CHECK-NEXT: s_cselect_b64 s[14:15], -1, 0 -; CHECK-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v1 ; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5] ; CHECK-NEXT: s_and_b64 s[6:7], exec, s[10:11] ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll index 02ce8be125afc..c0f3726a5c192 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -862,138 +862,160 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16 define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 { ; VI-DENORM-LABEL: multiple_use_fadd_multi_fmad_f16: ; VI-DENORM: ; %bb.0: -; VI-DENORM-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; VI-DENORM-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 +; VI-DENORM-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 +; VI-DENORM-NEXT: s_load_dword s6, s[8:9], 0x8 ; VI-DENORM-NEXT: s_add_i32 s12, s12, s17 ; VI-DENORM-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; VI-DENORM-NEXT: s_mov_b32 flat_scratch_lo, s13 ; VI-DENORM-NEXT: s_waitcnt lgkmcnt(0) -; VI-DENORM-NEXT: s_lshr_b32 s5, s2, 16 -; VI-DENORM-NEXT: v_mov_b32_e32 v0, s3 -; VI-DENORM-NEXT: v_mov_b32_e32 v1, s5 -; VI-DENORM-NEXT: v_fma_f16 v2, |s2|, 2.0, v0 -; VI-DENORM-NEXT: v_fma_f16 v3, |s2|, 2.0, v1 +; VI-DENORM-NEXT: s_lshr_b32 s0, s0, 16 ; VI-DENORM-NEXT: v_mov_b32_e32 v0, s0 -; VI-DENORM-NEXT: s_add_u32 s4, s0, 2 -; VI-DENORM-NEXT: v_mov_b32_e32 v1, s1 -; VI-DENORM-NEXT: s_addc_u32 s5, s1, 0 -; VI-DENORM-NEXT: flat_store_short v[0:1], v3 +; VI-DENORM-NEXT: v_fma_f16 v2, |s6|, 2.0, v0 +; VI-DENORM-NEXT: v_mov_b32_e32 v0, s1 +; VI-DENORM-NEXT: v_fma_f16 v3, |s6|, 2.0, v0 +; VI-DENORM-NEXT: v_mov_b32_e32 v0, s2 +; VI-DENORM-NEXT: s_mov_b32 flat_scratch_lo, s13 +; VI-DENORM-NEXT: s_add_u32 s4, s2, 2 +; VI-DENORM-NEXT: v_mov_b32_e32 v1, s3 +; VI-DENORM-NEXT: s_addc_u32 s5, s3, 0 +; VI-DENORM-NEXT: flat_store_short v[0:1], v2 ; VI-DENORM-NEXT: s_waitcnt vmcnt(0) ; VI-DENORM-NEXT: v_mov_b32_e32 v0, s4 ; VI-DENORM-NEXT: v_mov_b32_e32 v1, s5 -; VI-DENORM-NEXT: flat_store_short v[0:1], v2 +; VI-DENORM-NEXT: flat_store_short v[0:1], v3 ; VI-DENORM-NEXT: s_waitcnt vmcnt(0) ; VI-DENORM-NEXT: s_endpgm ; ; VI-FLUSH-LABEL: multiple_use_fadd_multi_fmad_f16: ; VI-FLUSH: ; %bb.0: -; VI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; VI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 +; VI-FLUSH-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 +; VI-FLUSH-NEXT: s_load_dword s6, s[8:9], 0x8 ; VI-FLUSH-NEXT: s_add_i32 s12, s12, s17 ; VI-FLUSH-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; VI-FLUSH-NEXT: s_mov_b32 flat_scratch_lo, s13 ; VI-FLUSH-NEXT: s_waitcnt lgkmcnt(0) -; VI-FLUSH-NEXT: s_lshr_b32 s5, s2, 16 -; VI-FLUSH-NEXT: v_mov_b32_e32 v0, s3 -; VI-FLUSH-NEXT: v_mov_b32_e32 v1, s5 -; VI-FLUSH-NEXT: v_mad_f16 v2, |s2|, 2.0, v0 -; VI-FLUSH-NEXT: v_mad_f16 v3, |s2|, 2.0, v1 +; VI-FLUSH-NEXT: s_lshr_b32 s0, s0, 16 ; VI-FLUSH-NEXT: v_mov_b32_e32 v0, s0 -; VI-FLUSH-NEXT: s_add_u32 s4, s0, 2 -; VI-FLUSH-NEXT: v_mov_b32_e32 v1, s1 -; VI-FLUSH-NEXT: s_addc_u32 s5, s1, 0 -; VI-FLUSH-NEXT: flat_store_short v[0:1], v3 +; VI-FLUSH-NEXT: v_mad_f16 v2, |s6|, 2.0, v0 +; VI-FLUSH-NEXT: v_mov_b32_e32 v0, s1 +; VI-FLUSH-NEXT: v_mad_f16 v3, |s6|, 2.0, v0 +; VI-FLUSH-NEXT: v_mov_b32_e32 v0, s2 +; VI-FLUSH-NEXT: s_mov_b32 flat_scratch_lo, s13 +; VI-FLUSH-NEXT: s_add_u32 s4, s2, 2 +; VI-FLUSH-NEXT: v_mov_b32_e32 v1, s3 +; VI-FLUSH-NEXT: s_addc_u32 s5, s3, 0 +; VI-FLUSH-NEXT: flat_store_short v[0:1], v2 ; VI-FLUSH-NEXT: s_waitcnt vmcnt(0) ; VI-FLUSH-NEXT: v_mov_b32_e32 v0, s4 ; VI-FLUSH-NEXT: v_mov_b32_e32 v1, s5 -; VI-FLUSH-NEXT: flat_store_short v[0:1], v2 +; VI-FLUSH-NEXT: flat_store_short v[0:1], v3 ; VI-FLUSH-NEXT: s_waitcnt vmcnt(0) ; VI-FLUSH-NEXT: s_endpgm ; ; GFX10-DENORM-LABEL: multiple_use_fadd_multi_fmad_f16: ; GFX10-DENORM: ; %bb.0: -; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX10-DENORM-NEXT: s_clause 0x2 +; GFX10-DENORM-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 +; GFX10-DENORM-NEXT: s_load_dword s4, s[8:9], 0x8 +; GFX10-DENORM-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-DENORM-NEXT: s_lshr_b32 s4, s2, 16 -; GFX10-DENORM-NEXT: v_fma_f16 v2, |s2|, 2.0, s3 -; GFX10-DENORM-NEXT: v_fma_f16 v1, |s2|, 2.0, s4 -; GFX10-DENORM-NEXT: global_store_short v0, v1, s[0:1] +; GFX10-DENORM-NEXT: s_lshr_b32 s0, s0, 16 +; GFX10-DENORM-NEXT: v_fma_f16 v2, |s4|, 2.0, s1 +; GFX10-DENORM-NEXT: v_fma_f16 v1, |s4|, 2.0, s0 +; GFX10-DENORM-NEXT: global_store_short v0, v1, s[2:3] ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-DENORM-NEXT: global_store_short v0, v2, s[0:1] offset:2 +; GFX10-DENORM-NEXT: global_store_short v0, v2, s[2:3] offset:2 ; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-DENORM-NEXT: s_endpgm ; ; GFX10-FLUSH-LABEL: multiple_use_fadd_multi_fmad_f16: ; GFX10-FLUSH: ; %bb.0: -; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX10-FLUSH-NEXT: s_clause 0x2 +; GFX10-FLUSH-NEXT: s_load_dword s4, s[8:9], 0x8 +; GFX10-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 +; GFX10-FLUSH-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 ; GFX10-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLUSH-NEXT: v_add_f16_e64 v0, |s2|, |s2| -; GFX10-FLUSH-NEXT: s_lshr_b32 s2, s2, 16 -; GFX10-FLUSH-NEXT: v_add_f16_e32 v2, s2, v0 -; GFX10-FLUSH-NEXT: v_add_f16_e32 v0, s3, v0 -; GFX10-FLUSH-NEXT: global_store_short v1, v2, s[0:1] +; GFX10-FLUSH-NEXT: v_add_f16_e64 v0, |s4|, |s4| +; GFX10-FLUSH-NEXT: s_lshr_b32 s0, s0, 16 +; GFX10-FLUSH-NEXT: v_add_f16_e32 v2, s0, v0 +; GFX10-FLUSH-NEXT: v_add_f16_e32 v0, s1, v0 +; GFX10-FLUSH-NEXT: global_store_short v1, v2, s[2:3] ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: global_store_short v1, v0, s[0:1] offset:2 +; GFX10-FLUSH-NEXT: global_store_short v1, v0, s[2:3] offset:2 ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-FLUSH-NEXT: s_endpgm ; ; GFX11-DENORM-TRUE16-LABEL: multiple_use_fadd_multi_fmad_f16: ; GFX11-DENORM-TRUE16: ; %bb.0: -; GFX11-DENORM-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX11-DENORM-TRUE16-NEXT: s_clause 0x2 +; GFX11-DENORM-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX11-DENORM-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11-DENORM-TRUE16-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX11-DENORM-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-DENORM-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-TRUE16-NEXT: s_lshr_b32 s4, s2, 16 -; GFX11-DENORM-TRUE16-NEXT: v_fma_f16 v0.h, |s2|, 2.0, s3 -; GFX11-DENORM-TRUE16-NEXT: v_fma_f16 v0.l, |s2|, 2.0, s4 -; GFX11-DENORM-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc +; GFX11-DENORM-TRUE16-NEXT: s_lshr_b32 s0, s0, 16 +; GFX11-DENORM-TRUE16-NEXT: v_fma_f16 v0.h, |s6|, 2.0, s1 +; GFX11-DENORM-TRUE16-NEXT: v_fma_f16 v0.l, |s6|, 2.0, s0 +; GFX11-DENORM-TRUE16-NEXT: global_store_b16 v1, v0, s[2:3] dlc ; GFX11-DENORM-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-DENORM-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] offset:2 dlc +; GFX11-DENORM-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[2:3] offset:2 dlc ; GFX11-DENORM-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-TRUE16-NEXT: s_endpgm ; ; GFX11-DENORM-FAKE16-LABEL: multiple_use_fadd_multi_fmad_f16: ; GFX11-DENORM-FAKE16: ; %bb.0: -; GFX11-DENORM-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX11-DENORM-FAKE16-NEXT: s_clause 0x2 +; GFX11-DENORM-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX11-DENORM-FAKE16-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11-DENORM-FAKE16-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX11-DENORM-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-DENORM-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-DENORM-FAKE16-NEXT: s_lshr_b32 s4, s2, 16 -; GFX11-DENORM-FAKE16-NEXT: v_fma_f16 v2, |s2|, 2.0, s3 -; GFX11-DENORM-FAKE16-NEXT: v_fma_f16 v1, |s2|, 2.0, s4 -; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc +; GFX11-DENORM-FAKE16-NEXT: s_lshr_b32 s0, s0, 16 +; GFX11-DENORM-FAKE16-NEXT: v_fma_f16 v2, |s6|, 2.0, s1 +; GFX11-DENORM-FAKE16-NEXT: v_fma_f16 v1, |s6|, 2.0, s0 +; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v0, v1, s[2:3] dlc ; GFX11-DENORM-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] offset:2 dlc +; GFX11-DENORM-FAKE16-NEXT: global_store_b16 v0, v2, s[2:3] offset:2 dlc ; GFX11-DENORM-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-DENORM-FAKE16-NEXT: s_endpgm ; ; GFX11-FLUSH-TRUE16-LABEL: multiple_use_fadd_multi_fmad_f16: ; GFX11-FLUSH-TRUE16: ; %bb.0: -; GFX11-FLUSH-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX11-FLUSH-TRUE16-NEXT: s_clause 0x2 +; GFX11-FLUSH-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11-FLUSH-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX11-FLUSH-TRUE16-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e64 v0.l, |s2|, |s2| -; GFX11-FLUSH-TRUE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e64 v0.l, |s6|, |s6| +; GFX11-FLUSH-TRUE16-NEXT: s_lshr_b32 s0, s0, 16 ; GFX11-FLUSH-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.h, s2, v0.l -; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, s3, v0.l -; GFX11-FLUSH-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[0:1] dlc +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.h, s0, v0.l +; GFX11-FLUSH-TRUE16-NEXT: v_add_f16_e32 v0.l, s1, v0.l +; GFX11-FLUSH-TRUE16-NEXT: global_store_d16_hi_b16 v1, v0, s[2:3] dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] offset:2 dlc +; GFX11-FLUSH-TRUE16-NEXT: global_store_b16 v1, v0, s[2:3] offset:2 dlc ; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-TRUE16-NEXT: s_endpgm ; ; GFX11-FLUSH-FAKE16-LABEL: multiple_use_fadd_multi_fmad_f16: ; GFX11-FLUSH-FAKE16: ; %bb.0: -; GFX11-FLUSH-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX11-FLUSH-FAKE16-NEXT: s_clause 0x2 +; GFX11-FLUSH-FAKE16-NEXT: s_load_b32 s6, s[4:5], 0x8 +; GFX11-FLUSH-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 +; GFX11-FLUSH-FAKE16-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX11-FLUSH-FAKE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-FLUSH-FAKE16-NEXT: v_add_f16_e64 v0, |s2|, |s2| -; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-FLUSH-FAKE16-NEXT: v_add_f16_e64 v0, |s6|, |s6| +; GFX11-FLUSH-FAKE16-NEXT: s_lshr_b32 s0, s0, 16 ; GFX11-FLUSH-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX11-FLUSH-FAKE16-NEXT: v_add_f16_e32 v2, s2, v0 -; GFX11-FLUSH-FAKE16-NEXT: v_add_f16_e32 v0, s3, v0 -; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v2, s[0:1] dlc +; GFX11-FLUSH-FAKE16-NEXT: v_add_f16_e32 v2, s0, v0 +; GFX11-FLUSH-FAKE16-NEXT: v_add_f16_e32 v0, s1, v0 +; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v2, s[2:3] dlc ; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v0, s[0:1] offset:2 dlc +; GFX11-FLUSH-FAKE16-NEXT: global_store_b16 v1, v0, s[2:3] offset:2 dlc ; GFX11-FLUSH-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-FLUSH-FAKE16-NEXT: s_endpgm %x = bitcast i16 %x.arg to half diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll index 05bd3ac93d608..9cc0e6228a913 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll @@ -133,11 +133,12 @@ define amdgpu_kernel void @i16_mad24(ptr addrspace(1) %out, i16 %a, i16 %b, i16 ; GCN-LABEL: i16_mad24: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_load_dword s6, s[4:5], 0xb ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_lshr_b32 s2, s4, 16 -; GCN-NEXT: s_mul_i32 s2, s4, s2 +; GCN-NEXT: s_mul_i32 s2, s6, s2 ; GCN-NEXT: s_add_i32 s2, s2, s5 ; GCN-NEXT: s_sext_i32_i16 s4, s2 ; GCN-NEXT: s_mov_b32 s2, -1 @@ -148,10 +149,11 @@ define amdgpu_kernel void @i16_mad24(ptr addrspace(1) %out, i16 %a, i16 %b, i16 ; GFX8-LABEL: i16_mad24: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_mov_b64 s[4:5], s[2:3] -; GFX8-NEXT: s_lshr_b32 s6, s4, 16 -; GFX8-NEXT: s_mul_i32 s4, s4, s6 +; GFX8-NEXT: s_lshr_b32 s4, s4, 16 +; GFX8-NEXT: s_mul_i32 s4, s6, s4 ; GFX8-NEXT: s_add_i32 s4, s4, s5 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll index 15fc987d1e7c6..68c33487b0596 100644 --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -388,18 +388,20 @@ define amdgpu_kernel void @v_sad_u32_vector_pat2(ptr addrspace(1) %out, <4 x i32 define amdgpu_kernel void @v_sad_u32_i16_pat1(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) { ; GCN-LABEL: v_sad_u32_i16_pat1: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GCN-NEXT: s_load_dword s4, s[8:9], 0x2 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 +; GCN-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x0 ; GCN-NEXT: s_add_i32 s12, s12, s17 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_and_b32 s4, s2, 0xffff -; GCN-NEXT: s_lshr_b32 s2, s2, 16 -; GCN-NEXT: v_mov_b32_e32 v0, s3 -; GCN-NEXT: v_mov_b32_e32 v1, s2 +; GCN-NEXT: s_and_b32 s4, s4, 0xffff +; GCN-NEXT: s_lshr_b32 s0, s0, 16 +; GCN-NEXT: v_mov_b32_e32 v0, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_sad_u32 v2, s4, v1, v0 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: flat_store_short v[0:1], v2 ; GCN-NEXT: s_endpgm %icmp0 = icmp ugt i16 %a, %b diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll index d041699bcc9e6..a5299ea36958d 100644 --- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -17,15 +17,17 @@ declare float @llvm.fmuladd.f32(float, float, float) #0 define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_load_dword s6, s[4:5], 0x10 ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 +; CHECK-NEXT: s_load_dword s10, s[4:5], 0x11 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; CHECK-NEXT: s_movk_i32 s0, 0x54 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: v_mad_u32_u24 v1, v1, s0, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_bitcmp1_b32 s2, 8 +; CHECK-NEXT: s_bitcmp1_b32 s6, 8 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 -; CHECK-NEXT: s_bitcmp1_b32 s2, 16 +; CHECK-NEXT: s_bitcmp1_b32 s6, 16 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0 ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2 @@ -35,7 +37,7 @@ define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], -1 ; CHECK-NEXT: s_bitcmp1_b32 s3, 0 ; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 -; CHECK-NEXT: s_bitcmp1_b32 s3, 8 +; CHECK-NEXT: s_bitcmp1_b32 s10, 8 ; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0 ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[6:7] ; CHECK-NEXT: s_and_b64 s[4:5], exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index c0c1763d54cc0..b3e0d142d928b 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -31,8 +31,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8 ; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %130:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: KILL undef %130:sgpr_128 + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: KILL undef %125:sgpr_128 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc @@ -44,38 +44,38 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.83, addrspace 4) - ; CHECK-NEXT: early-clobber %73:sgpr_256 = S_LOAD_DWORDX8_IMM_ec undef %74:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, align 16, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) poison`, addrspace 4) ; CHECK-NEXT: KILL undef %74:sreg_64 + ; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %123:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %94:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], %73.sub0_sub1_sub2_sub3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL undef %94:sgpr_128 - ; CHECK-NEXT: KILL undef %123:sgpr_128 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: KILL undef %89:sgpr_128 + ; CHECK-NEXT: KILL undef %118:sgpr_128 ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.89, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.95, addrspace 4) - ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4) ; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1 + ; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1 ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %174:sreg_32, 31, implicit-def dead $scc - ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %174:sreg_32, implicit-def $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.101, addrspace 4) + ; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc + ; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4) ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %174:sreg_32, implicit-def $scc + ; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc @@ -88,20 +88,20 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %307:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, undef %363:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %373:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.109, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.119, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.126, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %368:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc @@ -115,17 +115,17 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL4_ADD_U32_:%[0-9]+]]:sreg_32 = S_LSHL4_ADD_U32 [[COPY12]], 16, implicit-def dead $scc - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %388:sgpr_128, [[S_LSHL4_ADD_U32_]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.131, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.147, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.152, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.136, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.164, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.142, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_LSHL4_ADD_U32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc @@ -137,81 +137,85 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_3]], 31, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_3]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_4]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.276, align 8, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.159, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.274, align 8, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.172, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4) ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1 ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.185, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %73.sub0_sub1_sub2_sub3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.285, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.207, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.283, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.223, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.296, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.294, addrspace 4) ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0 ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.259, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.257, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4) + ; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1 + ; CHECK-NEXT: KILL undef %469:sreg_64 + ; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3 ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.268, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.266, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.308, align 8, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.306, align 8, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM22]] - ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 %73.sub0, 65535, implicit-def dead $scc + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]] ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]] ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY18]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc @@ -221,20 +225,20 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc ; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.326, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.324, addrspace 4) ; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.332, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.330, addrspace 4) ; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.338, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.336, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] + ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] - ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]] ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec @@ -346,12 +350,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec ; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec + ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4) ; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_23]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, %73, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) + ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vgpr_32, undef %558:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll index 57da5976b3cfa..935f311575250 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll @@ -1,79 +1,62 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s ; Checks that there is no crash when there are multiple tails ; for a the same head starting a chain. @0 = internal addrspace(3) global [16384 x i32] undef +; GCN-LABEL: @no_crash( +; GCN: store <2 x i32> zeroinitializer +; GCN: store i32 0 +; GCN: store i32 0 + define amdgpu_kernel void @no_crash(i32 %arg) { -; GCN-LABEL: define amdgpu_kernel void @no_crash( -; GCN-SAME: i32 [[ARG:%.*]]) { -; GCN-NEXT: [[TEMP2:%.*]] = add i32 [[ARG]], 14 -; GCN-NEXT: [[TEMP3:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0:[0-9]+]], i32 0, i32 [[TEMP2]] -; GCN-NEXT: [[TEMP4:%.*]] = add i32 [[ARG]], 15 -; GCN-NEXT: [[TEMP5:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[TEMP4]] -; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP3]], align 4 -; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4 -; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4 -; GCN-NEXT: ret void -; - %temp2 = add i32 %arg, 14 - %temp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %temp2 - %temp4 = add i32 %arg, 15 - %temp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %temp4 + %tmp2 = add i32 %arg, 14 + %tmp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp2 + %tmp4 = add i32 %arg, 15 + %tmp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp4 - store i32 0, ptr addrspace(3) %temp3, align 4 - store i32 0, ptr addrspace(3) %temp5, align 4 - store i32 0, ptr addrspace(3) %temp5, align 4 - store i32 0, ptr addrspace(3) %temp5, align 4 + store i32 0, ptr addrspace(3) %tmp3, align 4 + store i32 0, ptr addrspace(3) %tmp5, align 4 + store i32 0, ptr addrspace(3) %tmp5, align 4 + store i32 0, ptr addrspace(3) %tmp5, align 4 ret void } ; Check adjacent memory locations are properly matched and the ; longest chain vectorized + +; GCN-LABEL: @interleave_get_longest + +; GCN: load <2 x i32>{{.*}} %tmp1 +; GCN: store <2 x i32> zeroinitializer{{.*}} %tmp1 +; GCN: load <2 x i32>{{.*}} %tmp2 +; GCN: load <2 x i32>{{.*}} %tmp4 +; GCN: load i32{{.*}} %tmp5 +; GCN: load i32{{.*}} %tmp5 + define amdgpu_kernel void @interleave_get_longest(i32 %arg) { -; GCN-LABEL: define amdgpu_kernel void @interleave_get_longest( -; GCN-SAME: i32 [[ARG:%.*]]) { -; GCN-NEXT: [[A1:%.*]] = add i32 [[ARG]], 1 -; GCN-NEXT: [[A3:%.*]] = add i32 [[ARG]], 3 -; GCN-NEXT: [[TEMP1:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[ARG]] -; GCN-NEXT: [[TEMP2:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[A1]] -; GCN-NEXT: [[TEMP4:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[A3]] -; GCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP1]], align 4 -; GCN-NEXT: [[L21:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 -; GCN-NEXT: [[L12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP1]], align 4 -; GCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP2]], align 4 -; GCN-NEXT: [[L33:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GCN-NEXT: [[L44:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(3) [[TEMP4]], align 4 -; GCN-NEXT: [[L55:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 -; GCN-NEXT: [[L66:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 -; GCN-NEXT: [[L77:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 -; GCN-NEXT: [[L88:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 -; GCN-NEXT: ret void -; %a1 = add i32 %arg, 1 %a2 = add i32 %arg, 2 %a3 = add i32 %arg, 3 %a4 = add i32 %arg, 4 - %temp1 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %arg - %temp2 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a1 - %temp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a2 - %temp4 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a3 - %temp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a4 + %tmp1 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %arg + %tmp2 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a1 + %tmp3 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a2 + %tmp4 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a3 + %tmp5 = getelementptr [16384 x i32], ptr addrspace(3) @0, i32 0, i32 %a4 - %l1 = load i32, ptr addrspace(3) %temp2, align 4 - %l2 = load i32, ptr addrspace(3) %temp1, align 4 - store i32 0, ptr addrspace(3) %temp2, align 4 - store i32 0, ptr addrspace(3) %temp1, align 4 - %l3 = load i32, ptr addrspace(3) %temp2, align 4 - %l4 = load i32, ptr addrspace(3) %temp3, align 4 - %l5 = load i32, ptr addrspace(3) %temp4, align 4 - %l6 = load i32, ptr addrspace(3) %temp5, align 4 - %l7 = load i32, ptr addrspace(3) %temp5, align 4 - %l8 = load i32, ptr addrspace(3) %temp5, align 4 + %l1 = load i32, ptr addrspace(3) %tmp2, align 4 + %l2 = load i32, ptr addrspace(3) %tmp1, align 4 + store i32 0, ptr addrspace(3) %tmp2, align 4 + store i32 0, ptr addrspace(3) %tmp1, align 4 + %l3 = load i32, ptr addrspace(3) %tmp2, align 4 + %l4 = load i32, ptr addrspace(3) %tmp3, align 4 + %l5 = load i32, ptr addrspace(3) %tmp4, align 4 + %l6 = load i32, ptr addrspace(3) %tmp5, align 4 + %l7 = load i32, ptr addrspace(3) %tmp5, align 4 + %l8 = load i32, ptr addrspace(3) %tmp5, align 4 ret void } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll index 3a23f448fbeab..aec5bca3b6fd2 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll @@ -22,9 +22,8 @@ entry: } ; CHECK-LABEL: @cast_to_cast -; CHECK: load i64 -; CHECK-NEXT: inttoptr i64 -; CHECK-NEXT: inttoptr i64 +; CHECK: %tmp4 = load ptr, ptr %tmp1, align 8 +; CHECK: %tmp5 = load ptr, ptr %tmp3, align 8 define void @cast_to_cast() { entry: %a.ascast = addrspacecast ptr addrspace(5) undef to ptr diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll index 8f9a86e016702..bc1f8d3880fdb 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll @@ -86,6 +86,7 @@ define void @chain_prefix_suffix(ptr noalias %ptr) { ; CHECK: store <2 x i32> zeroinitializer ; CHECK: load <3 x i32> ; CHECK: load i32 +; CHECK: load i32 define void @interleave_get_longest(ptr noalias %ptr) { %tmp2 = getelementptr i32, ptr %ptr, i64 1 @@ -128,5 +129,4 @@ define void @interleave_get_longest_aligned(ptr noalias %ptr) { %l7 = load i32, ptr %tmp5, align 4 ret void -} - +} \ No newline at end of file From 7e7ea9c5357efcdf9ba6bd7ea3669e607a9af400 Mon Sep 17 00:00:00 2001 From: Ryutaro Okada <140468571+sakupan102@users.noreply.github.com> Date: Sat, 15 Nov 2025 04:56:24 +0900 Subject: [PATCH 33/56] [MLIR] Extend vector.scatter to accept tensor as base (#165548) This PR makes the following improvements to `vector.scatter` and its lowering pipeline: - In addition to `memref`, accept a ranked `tensor` as the base operand of `vector.scatter`, similar to `vector.transfer_write`. - Implement bufferization support for `vector.scatter`, so that tensor-based scatter ops can be fully lowered to memref-based forms. It's worth to complete the functionality of map_scatter decomposition. Full discussion can be found here: https://github.com/iree-org/iree/issues/21135 --------- Signed-off-by: Ryutaro Okada <1015ryu88@gmail.com> --- .../mlir/Dialect/Vector/IR/VectorOps.td | 53 +++++++++---------- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 3 +- .../Transforms/SparseVectorization.cpp | 4 +- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 12 +++-- .../BufferizableOpInterfaceImpl.cpp | 50 +++++++++++++++++ mlir/test/Dialect/Vector/bufferize.mlir | 20 +++++++ mlir/test/Dialect/Vector/invalid.mlir | 4 +- mlir/test/Dialect/Vector/ops.mlir | 14 +++++ 8 files changed, 122 insertions(+), 38 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 43172ff2082df..f91d2b6404c9b 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -2160,25 +2160,25 @@ def Vector_GatherOp : ]; } -def Vector_ScatterOp : - Vector_Op<"scatter", [ - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods - ]>, - Arguments<(ins Arg:$base, - Variadic:$offsets, - VectorOfNonZeroRankOf<[AnyInteger, Index]>:$indices, - VectorOfNonZeroRankOf<[I1]>:$mask, - AnyVectorOfNonZeroRank:$valueToStore, - OptionalAttr>: $alignment)> { +def Vector_ScatterOp + : Vector_Op<"scatter", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]>, + Arguments<(ins Arg, "", [MemWrite]>:$base, + Variadic:$offsets, + VectorOfNonZeroRankOf<[AnyInteger, Index]>:$indices, + VectorOfNonZeroRankOf<[I1]>:$mask, + AnyVectorOfNonZeroRank:$valueToStore, + OptionalAttr>:$alignment)>, + Results<(outs Optional:$result)> { let summary = [{ - scatters elements from a vector into memory as defined by an index vector + scatters elements from a vector into memory or ranked tensor as defined by an index vector and a mask vector }]; let description = [{ - The scatter operation stores elements from a n-D vector into memory as + The scatter operation stores elements from a n-D vector into memory or ranked tensor as defined by a base with indices and an additional n-D index vector, but only if the corresponding bit in a n-D mask vector is set. Otherwise, no action is taken for that element. Informally the semantics are: @@ -2221,31 +2221,28 @@ def Vector_ScatterOp : }]; let extraClassDeclaration = [{ - MemRefType getMemRefType() { return getBase().getType(); } + ShapedType getBaseType() { return getBase().getType(); } VectorType getIndexVectorType() { return getIndices().getType(); } VectorType getMaskVectorType() { return getMask().getType(); } VectorType getVectorType() { return getValueToStore().getType(); } }]; - let assemblyFormat = - "$base `[` $offsets `]` `[` $indices `]` `,` " - "$mask `,` $valueToStore attr-dict `:` type($base) `,` " - "type($indices) `,` type($mask) `,` type($valueToStore)"; + let assemblyFormat = "$base `[` $offsets `]` `[` $indices `]` `,` " + "$mask `,` $valueToStore attr-dict `:` type($base) `,` " + "type($indices) `,` type($mask) `,` " + "type($valueToStore) (`->` type($result)^)?"; let hasCanonicalizer = 1; let hasVerifier = 1; - let builders = [ - OpBuilder<(ins "Value":$base, - "ValueRange":$indices, - "Value":$index_vec, - "Value":$mask, - "Value":$valueToStore, - CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">: $alignment), [{ - return build($_builder, $_state, base, indices, index_vec, mask, valueToStore, + let builders = [OpBuilder< + (ins "Type":$resultType, "Value":$base, "ValueRange":$indices, + "Value":$index_vec, "Value":$mask, "Value":$valueToStore, + CArg<"llvm::MaybeAlign", "llvm::MaybeAlign()">:$alignment), + [{ + return build($_builder, $_state, resultType, base, indices, index_vec, mask, valueToStore, alignment.has_value() ? $_builder.getI64IntegerAttr(alignment->value()) : nullptr); - }]> - ]; + }]>]; } def Vector_ExpandLoadOp : diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index c747e1b59558a..05d541fe80356 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -345,7 +345,8 @@ class VectorScatterOpConversion matchAndRewrite(vector::ScatterOp scatter, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto loc = scatter->getLoc(); - MemRefType memRefType = scatter.getMemRefType(); + auto memRefType = dyn_cast(scatter.getBaseType()); + assert(memRefType && "The base should be bufferized"); if (failed(isMemRefTypeSupported(memRefType, *this->getTypeConverter()))) return rewriter.notifyMatchFailure(scatter, "memref type not supported"); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp index febec6d2d2ce4..23436a68535fc 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp @@ -132,8 +132,8 @@ static void genVectorStore(PatternRewriter &rewriter, Location loc, Value mem, SmallVector scalarArgs(idxs); Value indexVec = idxs.back(); scalarArgs.back() = constantIndex(rewriter, loc, 0); - vector::ScatterOp::create(rewriter, loc, mem, scalarArgs, indexVec, vmask, - rhs); + vector::ScatterOp::create(rewriter, loc, /*resultType=*/nullptr, mem, + scalarArgs, indexVec, vmask, rhs); return; } vector::MaskedStoreOp::create(rewriter, loc, mem, idxs, vmask, rhs); diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index daef0ba02100a..a97d0cd7f755b 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -6066,19 +6066,21 @@ LogicalResult ScatterOp::verify() { VectorType indVType = getIndexVectorType(); VectorType maskVType = getMaskVectorType(); VectorType valueVType = getVectorType(); - MemRefType memType = getMemRefType(); + ShapedType baseType = getBaseType(); - if (valueVType.getElementType() != memType.getElementType()) + if (!llvm::isa(baseType)) + return emitOpError("requires base to be a memref or ranked tensor type"); + + if (valueVType.getElementType() != baseType.getElementType()) return emitOpError("base and valueToStore element type should match"); - if (llvm::size(getOffsets()) != memType.getRank()) - return emitOpError("requires ") << memType.getRank() << " indices"; + if (llvm::size(getOffsets()) != baseType.getRank()) + return emitOpError("requires ") << baseType.getRank() << " indices"; if (valueVType.getShape() != indVType.getShape()) return emitOpError("expected valueToStore dim to match indices dim"); if (valueVType.getShape() != maskVType.getShape()) return emitOpError("expected valueToStore dim to match mask dim"); return success(); } - namespace { class ScatterFolder final : public OpRewritePattern { public: diff --git a/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp index 546099ca975b7..352f477a8746e 100644 --- a/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" using namespace mlir; using namespace mlir::bufferization; @@ -126,6 +127,54 @@ struct TransferWriteOpInterface } }; +/// Bufferization of vector.scatter. Replaced with a new vector.scatter that +/// operates on a memref. +struct ScatterOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const AnalysisState &state) const { + assert(isa(opOperand.get().getType()) && + "only tensor types expected"); + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const AnalysisState &state) const { + assert(isa(opOperand.get().getType()) && + "only tensor types expected"); + return true; + } + + AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand, + const AnalysisState &state) const { + assert(isa(opOperand.get().getType()) && + "only tensor types expected"); + auto scatterOp = cast(op); + if (&opOperand != &scatterOp.getBaseMutable()) + return {}; + return {{scatterOp.getResult(), BufferRelation::Equivalent}}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const BufferizationOptions &options, + BufferizationState &state) const { + auto scatterOp = cast(op); + assert(isa(scatterOp.getBaseType()) && + "only tensor types expected"); + FailureOr buffer = + getBuffer(rewriter, scatterOp.getBase(), options, state); + if (failed(buffer)) + return failure(); + vector::ScatterOp::create(rewriter, scatterOp.getLoc(), + /*resultType=*/nullptr, *buffer, + scatterOp.getOffsets(), scatterOp.getIndices(), + scatterOp.getMask(), scatterOp.getValueToStore()); + replaceOpWithBufferizedValues(rewriter, op, *buffer); + return success(); + } +}; + /// Bufferization of vector.gather. Replaced with a new vector.gather that /// operates on a memref. struct GatherOpInterface @@ -335,5 +384,6 @@ void mlir::vector::registerBufferizableOpInterfaceExternalModels( GatherOp::attachInterface(*ctx); MaskOp::attachInterface(*ctx); YieldOp::attachInterface(*ctx); + ScatterOp::attachInterface(*ctx); }); } diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir index 887fb941cc651..70adefd0dc4ec 100644 --- a/mlir/test/Dialect/Vector/bufferize.mlir +++ b/mlir/test/Dialect/Vector/bufferize.mlir @@ -32,6 +32,26 @@ func.func @transfer_write(%t: tensor, %o1: index, // ----- +// CHECK-LABEL: func @scatter( +// CHECK-SAME: %[[base:.*]]: tensor<16x16xf32>, %[[v:.*]]: vector<16xi32>, +// CHECK-SAME: %[[mask:.*]]: vector<16xi1>, %[[value:.*]]: vector<16xf32>) -> tensor<16x16xf32> +// CHECK: %[[buf:.*]] = bufferization.to_buffer %[[base]] : tensor<16x16xf32> to memref<16x16xf32> +// CHECK: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[alloc:.*]] = memref.alloc() {alignment = 64 : i64} : memref<16x16xf32> +// CHECK: memref.copy %[[buf]], %[[alloc]] : memref<16x16xf32> to memref<16x16xf32> +// CHECK: vector.scatter %[[alloc]][%[[c0]], %[[c0]]] [%[[v]]], %[[mask]], %[[value]] : memref<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> +// CHECK: %[[tensor:.*]] = bufferization.to_tensor %[[alloc]] : memref<16x16xf32> to tensor<16x16xf32> +// CHECK: return %[[tensor]] : tensor<16x16xf32> +func.func @scatter(%base: tensor<16x16xf32>, %v: vector<16xi32>, + %mask: vector<16xi1>, %value: vector<16xf32>) -> tensor<16x16xf32> { + %c0 = arith.constant 0 : index + %0 = vector.scatter %base[%c0, %c0][%v], %mask, %value + : tensor<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> -> tensor<16x16xf32> + return %0 : tensor<16x16xf32> +} + +// ----- + // CHECK-LABEL: func @gather( // CHECK-SAME: %[[base:.*]]: tensor, %[[v:.*]]: vector<16xi32>, // CHECK-SAME: %[[mask:.*]]: vector<16xi1>, %[[pass_thru:.*]]: vector<16xf32>) diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 5f035e35a1b86..79b09e172145b 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1491,9 +1491,9 @@ func.func @gather_non_power_of_two_alignment(%base: memref<16xf32>, %indices: ve func.func @scatter_to_vector(%base: vector<16xf32>, %indices: vector<16xi32>, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) { %c0 = arith.constant 0 : index - // expected-error@+2 {{custom op 'vector.scatter' invalid kind of type specified}} + // expected-error@+1 {{'vector.scatter' op operand #0 must be Tensor or MemRef of any type values, but got 'vector<16xf32>'}} vector.scatter %base[%c0][%indices], %mask, %pass_thru - : vector<16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> + : vector<16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> } // ----- diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index da9a1a8180a05..de620221944de 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -1160,3 +1160,17 @@ func.func @step() { %1 = vector.step : vector<[4]xindex> return } + +// CHECK-LABEL: func @scatter_tensor( +// CHECK-SAME: %[[BASE:.*]]: tensor<16x16xf32>, %[[V:.*]]: vector<16xi32>, +// CHECK-SAME: %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>) -> tensor<16x16xf32> +func.func @scatter_tensor(%base: tensor<16x16xf32>, %v: vector<16xi32>, + %mask: vector<16xi1>, %value: vector<16xf32>) -> tensor<16x16xf32> { + // CHECK: %[[C0:.*]] = arith.constant 0 : index + %c0 = arith.constant 0 : index + // CHECK: %[[RESULT:.*]] = vector.scatter %[[BASE]][%[[C0]], %[[C0]]] [%[[V]]], %[[MASK]], %[[VALUE]] + %0 = vector.scatter %base[%c0, %c0] [%v], %mask, %value + : tensor<16x16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> -> tensor<16x16xf32> + // CHECK: return %[[RESULT]] : tensor<16x16xf32> + return %0 : tensor<16x16xf32> +} From 862d34666f3c0514e0b8df9f2f6172333290f3e3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 14 Nov 2025 12:03:26 -0800 Subject: [PATCH 34/56] opt: Fix bad merge of #167996 (#168110) After the base branch was moved to main, this somehow ended up adding a second definition of RTLCI, instead of modifying the existing one. Also fix other build error with gcc bots. --- clang/lib/CodeGen/BackendUtil.cpp | 18 +++++++++--------- llvm/include/llvm/Target/TargetOptions.h | 2 +- llvm/lib/CodeGen/CommandFlags.cpp | 2 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +-- llvm/tools/llc/llc.cpp | 3 +-- llvm/tools/opt/optdriver.cpp | 5 ----- 6 files changed, 13 insertions(+), 20 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f1e20403ad668..6f63e6470270e 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -483,31 +483,31 @@ static bool initTargetOptions(const CompilerInstance &CI, switch (CodeGenOpts.getVecLib()) { case llvm::driver::VectorLibrary::NoLibrary: - Options.VectorLibrary = llvm::VectorLibrary::NoLibrary; + Options.VecLib = llvm::VectorLibrary::NoLibrary; break; case llvm::driver::VectorLibrary::Accelerate: - Options.VectorLibrary = llvm::VectorLibrary::Accelerate; + Options.VecLib = llvm::VectorLibrary::Accelerate; break; case llvm::driver::VectorLibrary::Darwin_libsystem_m: - Options.VectorLibrary = llvm::VectorLibrary::DarwinLibSystemM; + Options.VecLib = llvm::VectorLibrary::DarwinLibSystemM; break; case llvm::driver::VectorLibrary::LIBMVEC: - Options.VectorLibrary = llvm::VectorLibrary::LIBMVEC; + Options.VecLib = llvm::VectorLibrary::LIBMVEC; break; case llvm::driver::VectorLibrary::MASSV: - Options.VectorLibrary = llvm::VectorLibrary::MASSV; + Options.VecLib = llvm::VectorLibrary::MASSV; break; case llvm::driver::VectorLibrary::SVML: - Options.VectorLibrary = llvm::VectorLibrary::SVML; + Options.VecLib = llvm::VectorLibrary::SVML; break; case llvm::driver::VectorLibrary::SLEEF: - Options.VectorLibrary = llvm::VectorLibrary::SLEEFGNUABI; + Options.VecLib = llvm::VectorLibrary::SLEEFGNUABI; break; case llvm::driver::VectorLibrary::ArmPL: - Options.VectorLibrary = llvm::VectorLibrary::ArmPL; + Options.VecLib = llvm::VectorLibrary::ArmPL; break; case llvm::driver::VectorLibrary::AMDLIBM: - Options.VectorLibrary = llvm::VectorLibrary::AMDLIBM; + Options.VecLib = llvm::VectorLibrary::AMDLIBM; break; } diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index b9258c0fee692..0312515d11798 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -411,7 +411,7 @@ class TargetOptions { DebuggerKind DebuggerTuning = DebuggerKind::Default; /// Vector math library to use. - VectorLibrary VectorLibrary = VectorLibrary::NoLibrary; + VectorLibrary VecLib = VectorLibrary::NoLibrary; private: /// Flushing mode to assume in default FP environment. diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 02a6bb9357ad0..8f92ee30e221a 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -632,7 +632,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EnableTLSDESC = getExplicitEnableTLSDESC().value_or(TheTriple.hasDefaultTLSDESC()); Options.ExceptionModel = getExceptionModel(); - Options.VectorLibrary = getVectorLibrary(); + Options.VecLib = getVectorLibrary(); Options.EmitStackSizeSection = getEnableStackSizeSection(); Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EnableStaticDataPartitioning = getEnableStaticDataPartitioning(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 0f1e37bbf1bfc..0562fd8c08ba8 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -753,8 +753,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm), RuntimeLibcallInfo(TM.getTargetTriple(), TM.Options.ExceptionModel, TM.Options.FloatABIType, TM.Options.EABIVersion, - TM.Options.MCOptions.getABIName(), - TM.Options.VectorLibrary), + TM.Options.MCOptions.getABIName(), TM.Options.VecLib), Libcalls(RuntimeLibcallInfo) { initActions(); diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 1cfedad15ec35..e883142caf948 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -696,8 +696,7 @@ static int compileModule(char **argv, LLVMContext &Context, } // Add an appropriate TargetLibraryInfo pass for the module's triple. - TargetLibraryInfoImpl TLII(M->getTargetTriple(), - Target->Options.VectorLibrary); + TargetLibraryInfoImpl TLII(M->getTargetTriple(), Target->Options.VecLib); // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index c65cae54b9530..d24c8abef31d0 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -680,11 +680,6 @@ optMain(int argc, char **argv, "", // FIXME: Get ABI name from MCOptions VecLib); - // FIXME: Get ABI name from MCOptions - RTLIB::RuntimeLibcallsInfo RTLCI(ModuleTriple, codegen::getExceptionModel(), - codegen::getFloatABIForCalls(), - codegen::getEABIVersion()); - // The -disable-simplify-libcalls flag actually disables all builtin optzns. if (DisableSimplifyLibCalls) TLII.disableAllFunctions(); From dbd97c8d5521cfe80a6655b78e938cdb83742011 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 14 Nov 2025 12:38:02 -0800 Subject: [PATCH 35/56] DebugInfo: Relax codeview-empty-dbg-cu-crash test's version check (#168111) --- llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll b/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll index 51435b10fdc2a..9117f128ebe44 100644 --- a/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll +++ b/llvm/test/DebugInfo/X86/codeview-empty-dbg-cu-crash.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 22000 # Backend version +; CHECK-NEXT: .short {{[0-9]+}} # Backend version ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 From afd1ffb4d3bc3d4add96407d58b52372d4950d46 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Fri, 14 Nov 2025 12:52:30 -0800 Subject: [PATCH 36/56] [mlir][acc] Check legality of symbols in acc regions (#167957) This PR adds a new utility function to check whether symbols used in OpenACC regions are legal for offloading. Functions must be marked with `acc routine` or be built-in intrinsics. Global symbols must be marked with `acc declare`. The utility is designed to be extensible, and the OpenACCSupport analysis has been updated to allow handling of additional symbols that do not necessarily use OpenACC attributes but are marked in a way that still guarantees the symbol will be available when offloading. For example, in the Flang implementation, CUF attributes can be validated as legal symbols. --- .../Dialect/OpenACC/Analysis/OpenACCSupport.h | 37 ++- .../mlir/Dialect/OpenACC/OpenACCUtils.h | 10 + .../OpenACC/Analysis/OpenACCSupport.cpp | 7 + .../Dialect/OpenACC/Utils/OpenACCUtils.cpp | 50 ++++ .../Dialect/OpenACC/OpenACCUtilsTest.cpp | 239 ++++++++++++++++++ 5 files changed, 342 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h b/mlir/include/mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h index d9b2646b753f3..7be525e87a695 100644 --- a/mlir/include/mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h +++ b/mlir/include/mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h @@ -58,8 +58,10 @@ namespace mlir { namespace acc { -// Forward declaration for RecipeKind enum +// Forward declarations enum class RecipeKind : uint32_t; +bool isValidSymbolUse(Operation *user, SymbolRefAttr symbol, + Operation **definingOpPtr); namespace detail { /// This class contains internal trait classes used by OpenACCSupport. @@ -79,11 +81,27 @@ struct OpenACCSupportTraits { // Used to report a case that is not supported by the implementation. virtual InFlightDiagnostic emitNYI(Location loc, const Twine &message) = 0; + + /// Check if a symbol use is valid for use in an OpenACC region. + virtual bool isValidSymbolUse(Operation *user, SymbolRefAttr symbol, + Operation **definingOpPtr) = 0; }; + /// SFINAE helpers to detect if implementation has optional methods + template + using isValidSymbolUse_t = + decltype(std::declval().isValidSymbolUse(std::declval()...)); + + template + using has_isValidSymbolUse = + llvm::is_detected; + /// This class wraps a concrete OpenACCSupport implementation and forwards /// interface calls to it. This provides type erasure, allowing different /// implementation types to be used interchangeably without inheritance. + /// Methods can be optionally implemented; if not present, default behavior + /// is used. template class Model final : public Concept { public: @@ -102,6 +120,14 @@ struct OpenACCSupportTraits { return impl.emitNYI(loc, message); } + bool isValidSymbolUse(Operation *user, SymbolRefAttr symbol, + Operation **definingOpPtr) final { + if constexpr (has_isValidSymbolUse::value) + return impl.isValidSymbolUse(user, symbol, definingOpPtr); + else + return acc::isValidSymbolUse(user, symbol, definingOpPtr); + } + private: ImplT impl; }; @@ -154,6 +180,15 @@ class OpenACCSupport { /// unsupported case. InFlightDiagnostic emitNYI(Location loc, const Twine &message); + /// Check if a symbol use is valid for use in an OpenACC region. + /// + /// \param user The operation using the symbol. + /// \param symbol The symbol reference being used. + /// \param definingOpPtr Optional output parameter to receive the defining op. + /// \return true if the symbol use is valid, false otherwise. + bool isValidSymbolUse(Operation *user, SymbolRefAttr symbol, + Operation **definingOpPtr = nullptr); + /// Signal that this analysis should always be preserved so that /// underlying implementation registration is not lost. bool isInvalidated(const AnalysisManager::PreservedAnalyses &pa) { diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h index 964735755c4a3..2852e0917c3fb 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtils.h @@ -52,6 +52,16 @@ std::string getRecipeName(mlir::acc::RecipeKind kind, mlir::Type type); // base `array` from an operation that only accesses a subarray. mlir::Value getBaseEntity(mlir::Value val); +/// Check if a symbol use is valid for use in an OpenACC region. +/// This includes looking for various attributes such as `acc.routine_info` +/// and `acc.declare` attributes. +/// \param user The operation using the symbol +/// \param symbol The symbol reference being used +/// \param definingOpPtr Optional output parameter to receive the defining op +/// \return true if the symbol use is valid, false otherwise +bool isValidSymbolUse(mlir::Operation *user, mlir::SymbolRefAttr symbol, + mlir::Operation **definingOpPtr = nullptr); + } // namespace acc } // namespace mlir diff --git a/mlir/lib/Dialect/OpenACC/Analysis/OpenACCSupport.cpp b/mlir/lib/Dialect/OpenACC/Analysis/OpenACCSupport.cpp index 40e769e7068cf..1d775fb975738 100644 --- a/mlir/lib/Dialect/OpenACC/Analysis/OpenACCSupport.cpp +++ b/mlir/lib/Dialect/OpenACC/Analysis/OpenACCSupport.cpp @@ -41,5 +41,12 @@ InFlightDiagnostic OpenACCSupport::emitNYI(Location loc, const Twine &message) { return mlir::emitError(loc, "not yet implemented: " + message); } +bool OpenACCSupport::isValidSymbolUse(Operation *user, SymbolRefAttr symbol, + Operation **definingOpPtr) { + if (impl) + return impl->isValidSymbolUse(user, symbol, definingOpPtr); + return acc::isValidSymbolUse(user, symbol, definingOpPtr); +} + } // namespace acc } // namespace mlir diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp index fbac28e740750..aebc248e02ea0 100644 --- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp +++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtils.cpp @@ -9,8 +9,11 @@ #include "mlir/Dialect/OpenACC/OpenACCUtils.h" #include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Interfaces/ViewLikeInterface.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Casting.h" mlir::Operation *mlir::acc::getEnclosingComputeOp(mlir::Region ®ion) { @@ -155,3 +158,50 @@ mlir::Value mlir::acc::getBaseEntity(mlir::Value val) { return val; } + +bool mlir::acc::isValidSymbolUse(mlir::Operation *user, + mlir::SymbolRefAttr symbol, + mlir::Operation **definingOpPtr) { + mlir::Operation *definingOp = + mlir::SymbolTable::lookupNearestSymbolFrom(user, symbol); + + // If there are no defining ops, we have no way to ensure validity because + // we cannot check for any attributes. + if (!definingOp) + return false; + + if (definingOpPtr) + *definingOpPtr = definingOp; + + // Check if the defining op is a recipe (private, reduction, firstprivate). + // Recipes are valid as they get materialized before being offloaded to + // device. They are only instructions for how to materialize. + if (mlir::isa(definingOp)) + return true; + + // Check if the defining op is a function + if (auto func = + mlir::dyn_cast_if_present(definingOp)) { + // If this symbol is actually an acc routine - then it is expected for it + // to be offloaded - therefore it is valid. + if (func->hasAttr(mlir::acc::getRoutineInfoAttrName())) + return true; + + // If this symbol is a call to an LLVM intrinsic, then it is likely valid. + // Check the following: + // 1. The function is private + // 2. The function has no body + // 3. Name starts with "llvm." + // 4. The function's name is a valid LLVM intrinsic name + if (func.getVisibility() == mlir::SymbolTable::Visibility::Private && + func.getFunctionBody().empty() && func.getName().starts_with("llvm.") && + llvm::Intrinsic::lookupIntrinsicID(func.getName()) != + llvm::Intrinsic::not_intrinsic) + return true; + } + + // A declare attribute is needed for symbol references. + bool hasDeclare = definingOp->hasAttr(mlir::acc::getDeclareAttrName()); + return hasDeclare; +} diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp index 6f4e30585b2c9..8b1f532bbe5c0 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTest.cpp @@ -674,3 +674,242 @@ TEST_F(OpenACCUtilsTest, getBaseEntityChainedSubviews) { Value ultimateBase = getBaseEntity(baseEntity); EXPECT_EQ(ultimateBase, baseMemref); } + +//===----------------------------------------------------------------------===// +// isValidSymbolUse Tests +//===----------------------------------------------------------------------===// + +TEST_F(OpenACCUtilsTest, isValidSymbolUseNoDefiningOp) { + // Create a memref.get_global that references a non-existent global + auto memrefType = MemRefType::get({10}, b.getI32Type()); + llvm::StringRef globalName = "nonexistent_global"; + SymbolRefAttr nonExistentSymbol = SymbolRefAttr::get(&context, globalName); + + OwningOpRef getGlobalOp = + memref::GetGlobalOp::create(b, loc, memrefType, globalName); + + Operation *definingOp = nullptr; + bool result = + isValidSymbolUse(getGlobalOp.get(), nonExistentSymbol, &definingOp); + + EXPECT_FALSE(result); + EXPECT_EQ(definingOp, nullptr); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseRecipe) { + // Create a module to hold the recipe + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a private recipe (any recipe type would work) + auto i32Type = b.getI32Type(); + llvm::StringRef recipeName = "test_recipe"; + OwningOpRef recipeOp = + PrivateRecipeOp::create(b, loc, recipeName, i32Type); + + // Create a value to privatize + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue varPtr = + cast>(allocOp->getResult()); + + // Create a private op as the user operation + OwningOpRef privateOp = PrivateOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Create a symbol reference to the recipe + SymbolRefAttr recipeSymbol = SymbolRefAttr::get(&context, recipeName); + + Operation *definingOp = nullptr; + bool result = isValidSymbolUse(privateOp.get(), recipeSymbol, &definingOp); + + EXPECT_TRUE(result); + EXPECT_EQ(definingOp, recipeOp.get()); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseFunctionWithRoutineInfo) { + // Create a module to hold the function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a function with routine_info attribute + auto funcType = b.getFunctionType({}, {}); + llvm::StringRef funcName = "routine_func"; + OwningOpRef funcOp = + func::FuncOp::create(b, loc, funcName, funcType); + + // Add routine_info attribute with a reference to a routine + SmallVector routineRefs = { + SymbolRefAttr::get(&context, "acc_routine")}; + funcOp.get()->setAttr(getRoutineInfoAttrName(), + RoutineInfoAttr::get(&context, routineRefs)); + + // Create a call operation that uses the function symbol + SymbolRefAttr funcSymbol = SymbolRefAttr::get(&context, funcName); + OwningOpRef callOp = func::CallOp::create( + b, loc, funcSymbol, funcType.getResults(), ValueRange{}); + + Operation *definingOp = nullptr; + bool result = isValidSymbolUse(callOp.get(), funcSymbol, &definingOp); + + EXPECT_TRUE(result); + EXPECT_NE(definingOp, nullptr); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseLLVMIntrinsic) { + // Create a module to hold the function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a private function with LLVM intrinsic name + auto funcType = b.getFunctionType({b.getF32Type()}, {b.getF32Type()}); + llvm::StringRef intrinsicName = "llvm.sqrt.f32"; + OwningOpRef funcOp = + func::FuncOp::create(b, loc, intrinsicName, funcType); + + // Set visibility to private (required for intrinsics) + funcOp->setPrivate(); + + // Create a call operation that uses the intrinsic + SymbolRefAttr funcSymbol = SymbolRefAttr::get(&context, intrinsicName); + OwningOpRef callOp = func::CallOp::create( + b, loc, funcSymbol, funcType.getResults(), ValueRange{}); + + Operation *definingOp = nullptr; + bool result = isValidSymbolUse(callOp.get(), funcSymbol, &definingOp); + + EXPECT_TRUE(result); + EXPECT_NE(definingOp, nullptr); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseFunctionNotIntrinsic) { + // Create a module to hold the function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a private function that looks like intrinsic but isn't + auto funcType = b.getFunctionType({}, {}); + llvm::StringRef funcName = "llvm.not_a_real_intrinsic"; + OwningOpRef funcOp = + func::FuncOp::create(b, loc, funcName, funcType); + funcOp->setPrivate(); + + // Create a call operation that uses the function + SymbolRefAttr funcSymbol = SymbolRefAttr::get(&context, funcName); + OwningOpRef callOp = func::CallOp::create( + b, loc, funcSymbol, funcType.getResults(), ValueRange{}); + + Operation *definingOp = nullptr; + bool result = isValidSymbolUse(callOp.get(), funcSymbol, &definingOp); + + // Should be false because it's not a valid intrinsic and has no + // acc.routine_info attr + EXPECT_FALSE(result); + EXPECT_NE(definingOp, nullptr); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseWithDeclareAttr) { + // Create a module to hold a function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a function with declare attribute + auto funcType = b.getFunctionType({}, {}); + llvm::StringRef funcName = "declared_func"; + OwningOpRef funcOp = + func::FuncOp::create(b, loc, funcName, funcType); + + // Add declare attribute + funcOp.get()->setAttr( + getDeclareAttrName(), + DeclareAttr::get(&context, + DataClauseAttr::get(&context, DataClause::acc_copy))); + + // Create a call operation that uses the function + SymbolRefAttr funcSymbol = SymbolRefAttr::get(&context, funcName); + OwningOpRef callOp = func::CallOp::create( + b, loc, funcSymbol, funcType.getResults(), ValueRange{}); + + Operation *definingOp = nullptr; + bool result = isValidSymbolUse(callOp.get(), funcSymbol, &definingOp); + + EXPECT_TRUE(result); + EXPECT_NE(definingOp, nullptr); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseWithoutValidAttributes) { + // Create a module to hold a function + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a function without any special attributes + auto funcType = b.getFunctionType({}, {}); + llvm::StringRef funcName = "regular_func"; + OwningOpRef funcOp = + func::FuncOp::create(b, loc, funcName, funcType); + + // Create a call operation that uses the function + SymbolRefAttr funcSymbol = SymbolRefAttr::get(&context, funcName); + OwningOpRef callOp = func::CallOp::create( + b, loc, funcSymbol, funcType.getResults(), ValueRange{}); + + Operation *definingOp = nullptr; + bool result = isValidSymbolUse(callOp.get(), funcSymbol, &definingOp); + + // Should be false - no routine_info, not an intrinsic, no declare attribute + EXPECT_FALSE(result); + EXPECT_NE(definingOp, nullptr); +} + +TEST_F(OpenACCUtilsTest, isValidSymbolUseNullDefiningOpPtr) { + // Create a module to hold a recipe + OwningOpRef module = ModuleOp::create(loc); + Block *moduleBlock = module->getBody(); + + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(moduleBlock); + + // Create a private recipe + auto i32Type = b.getI32Type(); + llvm::StringRef recipeName = "test_recipe"; + OwningOpRef recipeOp = + PrivateRecipeOp::create(b, loc, recipeName, i32Type); + + // Create a value to privatize + auto memrefTy = MemRefType::get({10}, b.getI32Type()); + OwningOpRef allocOp = + memref::AllocaOp::create(b, loc, memrefTy); + TypedValue varPtr = + cast>(allocOp->getResult()); + + // Create a private op as the user operation + OwningOpRef privateOp = PrivateOp::create( + b, loc, varPtr, /*structured=*/true, /*implicit=*/false); + + // Create a symbol reference to the recipe + SymbolRefAttr recipeSymbol = SymbolRefAttr::get(&context, recipeName); + + // Call without definingOpPtr (nullptr) + bool result = isValidSymbolUse(privateOp.get(), recipeSymbol, nullptr); + + EXPECT_TRUE(result); +} From 77fd6bef3868eaef8388540e4a900939a2795837 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 14 Nov 2025 21:15:12 +0000 Subject: [PATCH 37/56] [LV] Also cover -force-target-instruction-cost=1 in tests. Extend test to cover different -force-target-instruction-cost settings. --- .../AArch64/force-target-instruction-cost.ll | 584 ++++++++---------- 1 file changed, 266 insertions(+), 318 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 068f82c7db670..29bbd015eed1f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -1,26 +1,27 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -force-target-instruction-cost=10 -S %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 +; RUN: opt -p loop-vectorize -force-target-instruction-cost=1 -S %s | FileCheck --check-prefixes=COMMON,COST1 %s +; RUN: opt -p loop-vectorize -force-target-instruction-cost=10 -S %s | FileCheck --check-prefixes=COMMON,COST10 %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" target triple = "arm64-apple-macosx14.0.0" define double @test_reduction_costs() { -; CHECK-LABEL: define double @test_reduction_costs() { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00)) -; CHECK-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00)) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[DIV:%.*]] = fmul double [[TMP0]], [[TMP1]] -; CHECK-NEXT: ret double [[DIV]] +; COMMON-LABEL: define double @test_reduction_costs() { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: br label %[[VECTOR_PH:.*]] +; COMMON: [[VECTOR_PH]]: +; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] +; COMMON: [[VECTOR_BODY]]: +; COMMON-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] +; COMMON-NEXT: [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] +; COMMON-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00)) +; COMMON-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00)) +; COMMON-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[EXIT:.*]] +; COMMON: [[EXIT]]: +; COMMON-NEXT: [[DIV:%.*]] = fmul double [[TMP0]], [[TMP1]] +; COMMON-NEXT: ret double [[DIV]] ; entry: br label %loop.1 @@ -41,71 +42,109 @@ exit: } define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { -; CHECK-LABEL: define void @test_iv_cost( -; CHECK-SAME: ptr [[PTR_START:%.*]], i8 [[A:%.*]], i64 [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[A_EXT:%.*]] = zext i8 [[A]] to i64 -; CHECK-NEXT: [[START:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 [[A_EXT]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[START]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[ITER_CHECK:.*]] -; CHECK: [[ITER_CHECK]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[START]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[START]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 16 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP1]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] -; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] -; CHECK: [[VEC_EPILOG_PH]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[START]], [[N_MOD_VF2]] -; CHECK-NEXT: [[IND_END1:%.*]] = sub i64 [[START]], [[N_VEC3]] -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC3]] -; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] -; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX4]] -; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[NEXT_GEP5]], align 1 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[START]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N7]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] -; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ] -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL8]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 -; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[EXIT_LOOPEXIT]]: -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; COST1-LABEL: define void @test_iv_cost( +; COST1-SAME: ptr [[PTR_START:%.*]], i8 [[A:%.*]], i64 [[B:%.*]]) { +; COST1-NEXT: [[ENTRY:.*:]] +; COST1-NEXT: [[A_EXT:%.*]] = zext i8 [[A]] to i64 +; COST1-NEXT: [[START:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 [[A_EXT]]) +; COST1-NEXT: [[C:%.*]] = icmp eq i64 [[START]], 0 +; COST1-NEXT: br i1 [[C]], [[EXIT:label %.*]], label %[[ITER_CHECK:.*]] +; COST1: [[ITER_CHECK]]: +; COST1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[START]], 4 +; COST1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; COST1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; COST1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[START]], 32 +; COST1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; COST1: [[VECTOR_PH]]: +; COST1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 32 +; COST1-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]] +; COST1-NEXT: br label %[[VECTOR_BODY:.*]] +; COST1: [[VECTOR_BODY]]: +; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] +; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 +; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 +; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; COST1-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; COST1-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; COST1: [[MIDDLE_BLOCK]]: +; COST1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] +; COST1-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; COST1: [[VEC_EPILOG_ITER_CHECK]]: +; COST1-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] +; COST1-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] +; COST1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 +; COST1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] +; COST1: [[VEC_EPILOG_PH]]: +; COST1-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; COST1-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4 +; COST1-NEXT: [[N_VEC3:%.*]] = sub i64 [[START]], [[N_MOD_VF2]] +; COST1-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC3]] +; COST1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC3]] +; COST1-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; COST1: [[VEC_EPILOG_VECTOR_BODY]]: +; COST1-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; COST1-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX4]] +; COST1-NEXT: store <4 x i8> zeroinitializer, ptr [[NEXT_GEP5]], align 1 +; COST1-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 +; COST1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; COST1-NEXT: br i1 [[TMP4]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; COST1: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; COST1-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[START]], [[N_VEC3]] +; COST1-NEXT: br i1 [[CMP_N7]], [[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; COST1: [[VEC_EPILOG_SCALAR_PH]]: +; +; COST10-LABEL: define void @test_iv_cost( +; COST10-SAME: ptr [[PTR_START:%.*]], i8 [[A:%.*]], i64 [[B:%.*]]) { +; COST10-NEXT: [[ENTRY:.*:]] +; COST10-NEXT: [[A_EXT:%.*]] = zext i8 [[A]] to i64 +; COST10-NEXT: [[START:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 [[A_EXT]]) +; COST10-NEXT: [[C:%.*]] = icmp eq i64 [[START]], 0 +; COST10-NEXT: br i1 [[C]], [[EXIT:label %.*]], label %[[ITER_CHECK:.*]] +; COST10: [[ITER_CHECK]]: +; COST10-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[START]], 4 +; COST10-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; COST10: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; COST10-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[START]], 16 +; COST10-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; COST10: [[VECTOR_PH]]: +; COST10-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 16 +; COST10-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]] +; COST10-NEXT: br label %[[VECTOR_BODY:.*]] +; COST10: [[VECTOR_BODY]]: +; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; COST10-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] +; COST10-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 +; COST10-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; COST10-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; COST10-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; COST10: [[MIDDLE_BLOCK]]: +; COST10-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] +; COST10-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; COST10: [[VEC_EPILOG_ITER_CHECK]]: +; COST10-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] +; COST10-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] +; COST10-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 +; COST10-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] +; COST10: [[VEC_EPILOG_PH]]: +; COST10-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; COST10-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4 +; COST10-NEXT: [[N_VEC3:%.*]] = sub i64 [[START]], [[N_MOD_VF2]] +; COST10-NEXT: [[TMP1:%.*]] = sub i64 [[START]], [[N_VEC3]] +; COST10-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC3]] +; COST10-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; COST10: [[VEC_EPILOG_VECTOR_BODY]]: +; COST10-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; COST10-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX4]] +; COST10-NEXT: store <4 x i8> zeroinitializer, ptr [[NEXT_GEP5]], align 1 +; COST10-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 +; COST10-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] +; COST10-NEXT: br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; COST10: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; COST10-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[START]], [[N_VEC3]] +; COST10-NEXT: br i1 [[CMP_N7]], [[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; COST10: [[VEC_EPILOG_SCALAR_PH]]: ; entry: %a.ext = zext i8 %a to i64 @@ -126,178 +165,99 @@ exit: ret void } -define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.ptr, ptr %dst.1, i1 %c.4, ptr %src, ptr %dst.3, i1 %c.3, ptr %dst.2) { -; CHECK-LABEL: define void @test_exit_branch_cost( -; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[X_PTR:%.*]], ptr noalias [[Y_PTR:%.*]], ptr [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr [[DST_3:%.*]], i1 [[C_3:%.*]], ptr [[DST_2:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] -; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 8 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST_3]], i64 8 -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[DST_2]], i64 8 -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 8 -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP1]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND05:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP2]] -; CHECK-NEXT: [[BOUND16:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT7:%.*]] = and i1 [[BOUND05]], [[BOUND16]] -; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT7]] -; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP3]] -; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] -; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]] -; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP4]] -; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] -; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX21]], [[FOUND_CONFLICT14]] -; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP2]] -; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP1]] -; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX15]], [[FOUND_CONFLICT18]] -; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP3]] -; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] -; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]] -; CHECK-NEXT: [[CONFLICT_RDX41:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]] -; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP4]] -; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]] -; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]] -; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX41]], [[FOUND_CONFLICT26]] -; CHECK-NEXT: [[BOUND028:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP3]] -; CHECK-NEXT: [[BOUND129:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]] -; CHECK-NEXT: [[FOUND_CONFLICT30:%.*]] = and i1 [[BOUND028]], [[BOUND129]] -; CHECK-NEXT: [[CONFLICT_RDX65:%.*]] = or i1 [[CONFLICT_RDX27]], [[FOUND_CONFLICT30]] -; CHECK-NEXT: [[BOUND032:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP4]] -; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP2]] -; CHECK-NEXT: [[FOUND_CONFLICT68:%.*]] = and i1 [[BOUND032]], [[BOUND133]] -; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX65]], [[FOUND_CONFLICT68]] -; CHECK-NEXT: [[BOUND036:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP4]] -; CHECK-NEXT: [[BOUND137:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP3]] -; CHECK-NEXT: [[FOUND_CONFLICT38:%.*]] = and i1 [[BOUND036]], [[BOUND137]] -; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX35]], [[FOUND_CONFLICT38]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX39]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT40]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT56:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE55:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7:![0-9]+]], !noalias [[META10:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] -; CHECK: [[PRED_STORE_IF42]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] -; CHECK: [[PRED_STORE_CONTINUE43]]: -; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] -; CHECK: [[PRED_STORE_IF44]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] -; CHECK: [[PRED_STORE_CONTINUE45]]: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] -; CHECK: [[PRED_STORE_IF46]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] -; CHECK: [[PRED_STORE_CONTINUE47]]: -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] -; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] -; CHECK: [[PRED_STORE_IF48]]: -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0 -; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] -; CHECK: [[PRED_STORE_CONTINUE49]]: -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] -; CHECK: [[PRED_STORE_IF50]]: -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1 -; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] -; CHECK: [[PRED_STORE_CONTINUE51]]: -; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP19]] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] -; CHECK: [[PRED_STORE_IF52]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] -; CHECK-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] -; CHECK: [[PRED_STORE_CONTINUE53]]: -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 -; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]] -; CHECK: [[PRED_STORE_IF54]]: -; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] -; CHECK: [[PRED_STORE_CONTINUE55]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[X_GEP:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[IV]] -; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[X_GEP]], align 8 -; CHECK-NEXT: [[Y_GEP:%.*]] = getelementptr i32, ptr [[Y_PTR]], i64 [[IV]] -; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Y_GEP]], align 4 -; CHECK-NEXT: [[C1:%.*]] = icmp eq i64 [[X]], 0 -; CHECK-NEXT: br i1 [[C1]], label %[[THEN_4:.*]], label %[[THEN_1:.*]] -; CHECK: [[THEN_1]]: -; CHECK-NEXT: [[AND32831:%.*]] = and i32 [[Y]], 1 -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8 -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[Y]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C_4]], i1 [[C_3]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label %[[THEN_2:.*]], label %[[ELSE_1:.*]] -; CHECK: [[ELSE_1]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8 -; CHECK-NEXT: br label %[[THEN_2]] -; CHECK: [[THEN_2]]: -; CHECK-NEXT: br i1 [[C_3]], label %[[THEN_3:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN_3]]: -; CHECK-NEXT: br i1 [[C_4]], label %[[THEN_5:.*]], label %[[ELSE_2:.*]] -; CHECK: [[THEN_4]]: -; CHECK-NEXT: call void @llvm.assume(i1 [[C_4]]) -; CHECK-NEXT: br label %[[THEN_5]] -; CHECK: [[THEN_5]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 1, %[[THEN_4]] ], [ 0, %[[THEN_3]] ] -; CHECK-NEXT: store i64 [[TMP0]], ptr [[DST_2]], align 8 -; CHECK-NEXT: br label %[[ELSE_2]] -; CHECK: [[ELSE_2]]: -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[SRC]], align 8 -; CHECK-NEXT: store i64 [[L]], ptr [[DST]], align 8 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP22:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.ptr, ptr noalias %dst.1, i1 %c.4, ptr %src, ptr noalias %dst.3, i1 %c.3, ptr noalias %dst.2) { +; COMMON-LABEL: define void @test_exit_branch_cost( +; COMMON-SAME: ptr [[DST:%.*]], ptr noalias [[X_PTR:%.*]], ptr noalias [[Y_PTR:%.*]], ptr noalias [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr noalias [[DST_3:%.*]], i1 [[C_3:%.*]], ptr noalias [[DST_2:%.*]]) { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; COMMON: [[VECTOR_MEMCHECK]]: +; COMMON-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; COMMON-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; COMMON-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; COMMON-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] +; COMMON-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; COMMON-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; COMMON: [[VECTOR_PH]]: +; COMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0 +; COMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; COMMON-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0 +; COMMON-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT2]], <2 x i1> poison, <2 x i32> zeroinitializer +; COMMON-NEXT: [[TMP0:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> zeroinitializer +; COMMON-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[TMP0]], splat (i1 true) +; COMMON-NEXT: [[TMP2:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[BROADCAST_SPLAT3]], <2 x i1> zeroinitializer +; COMMON-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT3]], splat (i1 true) +; COMMON-NEXT: [[TMP4:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP3]], <2 x i1> zeroinitializer +; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] +; COMMON: [[VECTOR_BODY]]: +; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[INDEX]] +; COMMON-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 +; COMMON-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer +; COMMON-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; COMMON-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; COMMON-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; COMMON: [[PRED_STORE_IF]]: +; COMMON-NEXT: store i64 0, ptr [[DST_1]], align 8 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]] +; COMMON: [[PRED_STORE_CONTINUE]]: +; COMMON-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; COMMON-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; COMMON: [[PRED_STORE_IF4]]: +; COMMON-NEXT: store i64 0, ptr [[DST_1]], align 8 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE5]] +; COMMON: [[PRED_STORE_CONTINUE5]]: +; COMMON-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer +; COMMON-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; COMMON-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; COMMON: [[PRED_STORE_IF6]]: +; COMMON-NEXT: store i64 0, ptr [[DST_3]], align 8 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; COMMON: [[PRED_STORE_CONTINUE7]]: +; COMMON-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; COMMON-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; COMMON: [[PRED_STORE_IF8]]: +; COMMON-NEXT: store i64 0, ptr [[DST_3]], align 8 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE9]] +; COMMON: [[PRED_STORE_CONTINUE9]]: +; COMMON-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer +; COMMON-NEXT: [[TMP14:%.*]] = or <2 x i1> [[TMP6]], [[TMP13]] +; COMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP13]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) +; COMMON-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP14]], i32 0 +; COMMON-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]] +; COMMON: [[PRED_STORE_IF10]]: +; COMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 0 +; COMMON-NEXT: store i64 [[TMP16]], ptr [[DST_2]], align 8 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE11]] +; COMMON: [[PRED_STORE_CONTINUE11]]: +; COMMON-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP14]], i32 1 +; COMMON-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] +; COMMON: [[PRED_STORE_IF12]]: +; COMMON-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 1 +; COMMON-NEXT: store i64 [[TMP18]], ptr [[DST_2]], align 8 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE13]] +; COMMON: [[PRED_STORE_CONTINUE13]]: +; COMMON-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP4]], <2 x i1> zeroinitializer +; COMMON-NEXT: [[TMP20:%.*]] = or <2 x i1> [[TMP14]], [[TMP19]] +; COMMON-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP20]], i32 0 +; COMMON-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] +; COMMON: [[PRED_STORE_IF14]]: +; COMMON-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META7:![0-9]+]] +; COMMON-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META10:![0-9]+]], !noalias [[META7]] +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE15]] +; COMMON: [[PRED_STORE_CONTINUE15]]: +; COMMON-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP20]], i32 1 +; COMMON-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] +; COMMON: [[PRED_STORE_IF16]]: +; COMMON-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META7]] +; COMMON-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META10]], !noalias [[META7]] +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE17]] +; COMMON: [[PRED_STORE_CONTINUE17]]: +; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; COMMON-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; COMMON-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[SCALAR_PH]] +; COMMON: [[SCALAR_PH]]: ; entry: br label %loop.header @@ -352,42 +312,57 @@ exit: } define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { -; CHECK-LABEL: define void @invalid_legacy_cost( -; CHECK-SAME: i64 [[N:%.*]], ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] -; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 0, align 16 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr ptr, ptr [[X]], i64 [[IV]] -; CHECK-NEXT: store ptr [[TMP12]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] -; CHECK: [[FOR_END]]: -; CHECK-NEXT: ret void +; COST1-LABEL: define void @invalid_legacy_cost( +; COST1-SAME: i64 [[N:%.*]], ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; COST1-NEXT: [[ENTRY:.*:]] +; COST1-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; COST1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; COST1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; COST1: [[VECTOR_PH]]: +; COST1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; COST1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; COST1-NEXT: br label %[[VECTOR_BODY:.*]] +; COST1: [[VECTOR_BODY]]: +; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; COST1-NEXT: [[TMP1:%.*]] = alloca i8, i64 0, align 16 +; COST1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP1]], i64 0 +; COST1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer +; COST1-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] +; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i32 2 +; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; COST1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; COST1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; COST1: [[MIDDLE_BLOCK]]: +; COST1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; COST1-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; COST1: [[SCALAR_PH]]: +; +; COST10-LABEL: define void @invalid_legacy_cost( +; COST10-SAME: i64 [[N:%.*]], ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; COST10-NEXT: [[ENTRY:.*:]] +; COST10-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; COST10-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; COST10-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; COST10: [[VECTOR_PH]]: +; COST10-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; COST10-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; COST10-NEXT: br label %[[VECTOR_BODY:.*]] +; COST10: [[VECTOR_BODY]]: +; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; COST10-NEXT: [[TMP1:%.*]] = alloca i8, i64 0, align 16 +; COST10-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP1]], i64 0 +; COST10-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer +; COST10-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] +; COST10-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; COST10-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; COST10-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; COST10-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; COST10: [[MIDDLE_BLOCK]]: +; COST10-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; COST10-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] +; COST10: [[SCALAR_PH]]: ; entry: br label %for.body @@ -409,30 +384,3 @@ attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) } declare void @llvm.assume(i1 noundef) declare i64 @llvm.umin.i64(i64, i64) -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} -; CHECK: [[PROF4]] = !{!"branch_weights", i32 4, i32 12} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} -; CHECK: [[META7]] = !{[[META8:![0-9]+]]} -; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]} -; CHECK: [[META9]] = distinct !{[[META9]], !"LVerDomain"} -; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]]} -; CHECK: [[META11]] = distinct !{[[META11]], [[META9]]} -; CHECK: [[META12]] = distinct !{[[META12]], [[META9]]} -; CHECK: [[META13]] = distinct !{[[META13]], [[META9]]} -; CHECK: [[META14]] = distinct !{[[META14]], [[META9]]} -; CHECK: [[META15]] = !{[[META11]]} -; CHECK: [[META16]] = !{[[META12]], [[META13]], [[META14]]} -; CHECK: [[META17]] = !{[[META12]]} -; CHECK: [[META18]] = !{[[META13]], [[META14]]} -; CHECK: [[META19]] = !{[[META14]]} -; CHECK: [[META20]] = !{[[META13]]} -; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} -; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]]} -; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]} -; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]} -;. From dc491d9760d1f6391407f830f77765917ee5b54b Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Fri, 14 Nov 2025 13:19:30 -0800 Subject: [PATCH 38/56] [flang][acc] Add missing dependency on MLIROpenACCUtils (#168117) FIROpenACCTransforms needs to link against MLIROpenACCUtils; otherwise, linking will fail: `undefined reference to `mlir::acc::isValidSymbolUse` --- flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt index 35aa87d6f1c80..d41e99a6c0679 100644 --- a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt @@ -15,4 +15,5 @@ add_flang_library(FIROpenACCTransforms MLIRIR MLIRPass MLIROpenACCDialect + MLIROpenACCUtils ) From 8c74cc5a76f2bab5ca679a4d6353b96e510048af Mon Sep 17 00:00:00 2001 From: alessandra simmons Date: Fri, 14 Nov 2025 16:19:49 -0500 Subject: [PATCH 39/56] [mlir] Remove filtering of deprecated rocm-agent-enumerator value gfx000 (#166634) Getting a gfx000 result from the `rocm-agent-enumerator` command was deprecated beginning with the release of ROCm 7, but the MLIR build system still filters it from results when looking for ROCm agents. This PR removes that filtering. There are a few other uses of "gfx000" in MLIR source, but those are used as default options for running some passes, and, to my understanding, have a semantically different meaning to the dummy result returned from `rocm-agent-enumerator` and don't need to be changed. --- mlir/lib/ExecutionEngine/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index 90024b1c8206e..a6153523a5e97 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -354,7 +354,6 @@ if(LLVM_ENABLE_PIC) endif() string(STRIP AGENTS_STRING ${AGENTS_STRING}) string(REPLACE "\n" ";" AGENTS_LIST ${AGENTS_STRING}) - list(FILTER AGENTS_LIST EXCLUDE REGEX "gfx000") if (AGENTS_LIST STREQUAL "") message(SEND_ERROR "No non-CPU ROCm agents found on the system, and ROCM_TEST_CHIPSET is not defined") else() From 88e9a78ad6b46f187d1a305a13bdd83ea36db8c5 Mon Sep 17 00:00:00 2001 From: SunilKuravinakop <98882378+SunilKuravinakop@users.noreply.github.com> Date: Sat, 15 Nov 2025 02:50:23 +0530 Subject: [PATCH 40/56] [Clang][OpenMP] Bug fix Default clause variable category (#168112) Same changes as in fix for [165276](https://github.com/llvm/llvm-project/pull/165276) except for remove unnecessary include in test to restore Ubuntu build. This is not needed as allocatable modifier is not applicable to the default clause in C/C++. Co-authored-by: Sunil Kuravinakop --- clang/lib/Sema/SemaOpenMP.cpp | 6 +- ...allel_default_variableCategory_codegen.cpp | 91 +++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 81c591a00cfc6..31c8f0cd30c56 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1364,15 +1364,15 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, DefaultDataSharingAttributes IterDA = Iter->DefaultAttr; switch (Iter->DefaultVCAttr) { case DSA_VC_aggregate: - if (!VD->getType()->isAggregateType()) + if (!D->getType()->isAggregateType()) IterDA = DSA_none; break; case DSA_VC_pointer: - if (!VD->getType()->isPointerType()) + if (!D->getType()->isPointerType()) IterDA = DSA_none; break; case DSA_VC_scalar: - if (!VD->getType()->isScalarType()) + if (!D->getType()->isScalarType()) IterDA = DSA_none; break; case DSA_VC_all: diff --git a/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp b/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp new file mode 100644 index 0000000000000..ffafc9a9410b7 --- /dev/null +++ b/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp @@ -0,0 +1,91 @@ +// RUN: %clangxx -Xclang -verify -Wno-vla -fopenmp -fopenmp-version=60 -x c++ -S -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +int global; +#define VECTOR_SIZE 4 + +int main (int argc, char **argv) { + int i,n; + int x; + + n = VECTOR_SIZE; + + #pragma omp parallel masked firstprivate(x) num_threads(2) + { + int *xPtr = nullptr; + // scalar + #pragma omp task default(shared:scalar) + { + xPtr = &x; + } + #pragma omp taskwait + + // pointer + #pragma omp task default(shared:pointer) shared(x) + { + xPtr = &x; + } + #pragma omp taskwait + } + + int *aggregate[VECTOR_SIZE] = {0,0,0,0}; + + #pragma omp parallel masked num_threads(2) + { + // aggregate + #pragma omp task default(shared:aggregate) + for(i=0;i Date: Fri, 14 Nov 2025 13:46:59 -0800 Subject: [PATCH 41/56] [AArch64][FEAT_CMPBR] Codegen for Armv9.6-a CBB and CBH (#164899) This patch adds codegen for CBB and CBH, CB variants operating on bytes and half-words, allowing to fold sign- and zero-extensions. Since if-conversion needs to be able to undo conditional branches, we remember possibly folded zero- and sign-extensions, as well as potentially folded assertzext and assertsext as additional arguments of the CBBAssertExt and CBHAssertExt pseudos during codegen. --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 61 +- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 31 + .../lib/Target/AArch64/AArch64InstrFormats.td | 24 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 129 ++- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 48 +- .../CodeGen/AArch64/cmpbr-early-ifcvt.mir | 435 +++++++++- llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll | 761 ++++++++++++++++-- llvm/test/CodeGen/AArch64/cmpbr-zext-sext.ll | 230 ++++++ 9 files changed, 1582 insertions(+), 139 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/cmpbr-zext-sext.ll diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index e8766bc1b8c62..96176b79e98a2 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -2677,23 +2677,32 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) { void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) { bool IsImm = false; - bool Is32Bit = false; + unsigned Width = 0; switch (MI->getOpcode()) { default: llvm_unreachable("This is not a CB pseudo instruction"); + case AArch64::CBBAssertExt: + IsImm = false; + Width = 8; + break; + case AArch64::CBHAssertExt: + IsImm = false; + Width = 16; + break; case AArch64::CBWPrr: - Is32Bit = true; + Width = 32; break; case AArch64::CBXPrr: - Is32Bit = false; + Width = 64; break; case AArch64::CBWPri: IsImm = true; - Is32Bit = true; + Width = 32; break; case AArch64::CBXPri: IsImm = true; + Width = 64; break; } @@ -2703,61 +2712,61 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) { bool NeedsImmDec = false; bool NeedsImmInc = false; +#define GET_CB_OPC(IsImm, Width, ImmCond, RegCond) \ + (IsImm \ + ? (Width == 32 ? AArch64::CB##ImmCond##Wri : AArch64::CB##ImmCond##Xri) \ + : (Width == 8 \ + ? AArch64::CBB##RegCond##Wrr \ + : (Width == 16 ? AArch64::CBH##RegCond##Wrr \ + : (Width == 32 ? AArch64::CB##RegCond##Wrr \ + : AArch64::CB##RegCond##Xrr)))) + unsigned MCOpC; + // Decide if we need to either swap register operands or increment/decrement // immediate operands - unsigned MCOpC; switch (CC) { default: llvm_unreachable("Invalid CB condition code"); case AArch64CC::EQ: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBEQWri : AArch64::CBEQXri) - : (Is32Bit ? AArch64::CBEQWrr : AArch64::CBEQXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ EQ, /* Reg-Reg */ EQ); break; case AArch64CC::NE: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBNEWri : AArch64::CBNEXri) - : (Is32Bit ? AArch64::CBNEWrr : AArch64::CBNEXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ NE, /* Reg-Reg */ NE); break; case AArch64CC::HS: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri) - : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ HI, /* Reg-Reg */ HS); NeedsImmDec = IsImm; break; case AArch64CC::LO: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri) - : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ LO, /* Reg-Reg */ HI); NeedsRegSwap = !IsImm; break; case AArch64CC::HI: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri) - : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ HI, /* Reg-Reg */ HI); break; case AArch64CC::LS: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri) - : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ LO, /* Reg-Reg */ HS); NeedsRegSwap = !IsImm; NeedsImmInc = IsImm; break; case AArch64CC::GE: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri) - : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ GT, /* Reg-Reg */ GE); NeedsImmDec = IsImm; break; case AArch64CC::LT: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri) - : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ LT, /* Reg-Reg */ GT); NeedsRegSwap = !IsImm; break; case AArch64CC::GT: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri) - : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ GT, /* Reg-Reg */ GT); break; case AArch64CC::LE: - MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri) - : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr); + MCOpC = GET_CB_OPC(IsImm, Width, /* Reg-Imm */ LT, /* Reg-Reg */ GE); NeedsRegSwap = !IsImm; NeedsImmInc = IsImm; break; } +#undef GET_CB_OPC MCInst Inst; Inst.setOpcode(MCOpC); @@ -3438,6 +3447,8 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { } case AArch64::CBWPri: case AArch64::CBXPri: + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: case AArch64::CBWPrr: case AArch64::CBXPrr: emitCBPseudoExpansion(MI); diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index e7b2d20e2a6cb..f1db05dda4e40 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -513,6 +513,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { bool SelectAnyPredicate(SDValue N); bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm); + + template + bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType); }; class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy { @@ -7697,3 +7700,31 @@ bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, return false; } + +template +bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, + SDValue &ExtType) { + + // Use an invalid shift-extend value to indicate we don't need to extend later + if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) { + EVT Ty = cast(N.getOperand(1))->getVT(); + if (Ty != (MatchCBB ? MVT::i8 : MVT::i16)) + return false; + Reg = N.getOperand(0); + ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend, + SDLoc(N), MVT::i32); + return true; + } + + AArch64_AM::ShiftExtendType ET = getExtendTypeForNode(N); + + if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) || + (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) { + Reg = N.getOperand(0); + ExtType = + CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32); + return true; + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index a7874dfeab6ce..6871c2d504cf6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -415,6 +415,12 @@ def CmpBranchUImm6Operand_64b let WantsParent = true; } +def CmpBranchBExtOperand + : ComplexPattern", []> {} + +def CmpBranchHExtOperand + : ComplexPattern", []> {} + def UImm6Plus1Operand : AsmOperandClass { let Name = "UImm6P1"; let DiagnosticType = "InvalidImm1_64"; @@ -13196,8 +13202,22 @@ multiclass CmpBranchRegisterAlias { } class CmpBranchRegisterPseudo - : Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, regtype:$Rm, am_brcmpcond:$Target), []>, - Sched<[WriteBr]> { + : Pseudo<(outs), + (ins ccode:$Cond, regtype:$Rt, regtype:$Rm, am_brcmpcond:$Target), + []>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; +} + +// Cmpbr pseudo instruction, encoding potentially folded zero-, sign-extension, +// assertzext and/or assersext. +class CmpBranchExtRegisterPseudo + : Pseudo<(outs), + (ins ccode:$Cond, GPR32:$Rt, GPR32:$Rm, am_brcmpcond:$Target, + simm8_32b:$ExtRt, simm8_32b:$ExtRm), + []>, + Sched<[WriteBr]> { let isBranch = 1; let isTerminator = 1; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 2c7aca8fc91ce..221812f1ebc7b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -241,6 +241,17 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, Cond.push_back(LastInst->getOperand(1)); Cond.push_back(LastInst->getOperand(2)); break; + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: + Target = LastInst->getOperand(3).getMBB(); + Cond.push_back(MachineOperand::CreateImm(-1)); // -1 + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc + Cond.push_back(LastInst->getOperand(0)); // Cond + Cond.push_back(LastInst->getOperand(1)); // Op0 + Cond.push_back(LastInst->getOperand(2)); // Op1 + Cond.push_back(LastInst->getOperand(4)); // Ext0 + Cond.push_back(LastInst->getOperand(5)); // Ext1 + break; } } @@ -264,6 +275,8 @@ static unsigned getBranchDisplacementBits(unsigned Opc) { return BCCDisplacementBits; case AArch64::CBWPri: case AArch64::CBXPri: + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: case AArch64::CBWPrr: case AArch64::CBXPrr: return CBDisplacementBits; @@ -298,6 +311,8 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { return MI.getOperand(1).getMBB(); case AArch64::CBWPri: case AArch64::CBXPri: + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: case AArch64::CBWPrr: case AArch64::CBXPrr: return MI.getOperand(3).getMBB(); @@ -580,9 +595,11 @@ bool AArch64InstrInfo::reverseBranchCondition( Cond[1].setImm(AArch64::TBZX); break; - // Cond is { -1, Opcode, CC, Op0, Op1 } + // Cond is { -1, Opcode, CC, Op0, Op1, ... } case AArch64::CBWPri: case AArch64::CBXPri: + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: case AArch64::CBWPrr: case AArch64::CBXPrr: { // Pseudos using standard 4bit Arm condition codes @@ -654,6 +671,12 @@ void AArch64InstrInfo::instantiateCondBranch( MIB.add(Cond[4]); MIB.addMBB(TBB); + + // cb[b,h] + if (Cond.size() > 5) { + MIB.addImm(Cond[5].getImm()); + MIB.addImm(Cond[6].getImm()); + } } } @@ -931,44 +954,122 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, // We must insert a cmp, that is a subs // 0 1 2 3 4 // Cond is { -1, Opcode, CC, Op0, Op1 } - unsigned SUBSOpC, SUBSDestReg; + + unsigned SubsOpc, SubsDestReg; bool IsImm = false; CC = static_cast(Cond[2].getImm()); switch (Cond[1].getImm()) { default: llvm_unreachable("Unknown branch opcode in Cond"); case AArch64::CBWPri: - SUBSOpC = AArch64::SUBSWri; - SUBSDestReg = AArch64::WZR; + SubsOpc = AArch64::SUBSWri; + SubsDestReg = AArch64::WZR; IsImm = true; break; case AArch64::CBXPri: - SUBSOpC = AArch64::SUBSXri; - SUBSDestReg = AArch64::XZR; + SubsOpc = AArch64::SUBSXri; + SubsDestReg = AArch64::XZR; IsImm = true; break; case AArch64::CBWPrr: - SUBSOpC = AArch64::SUBSWrr; - SUBSDestReg = AArch64::WZR; + SubsOpc = AArch64::SUBSWrr; + SubsDestReg = AArch64::WZR; IsImm = false; break; case AArch64::CBXPrr: - SUBSOpC = AArch64::SUBSXrr; - SUBSDestReg = AArch64::XZR; + SubsOpc = AArch64::SUBSXrr; + SubsDestReg = AArch64::XZR; IsImm = false; break; } if (IsImm) - BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg) + BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg) .addReg(Cond[3].getReg()) .addImm(Cond[4].getImm()) .addImm(0); else - BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg) + BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg) .addReg(Cond[3].getReg()) .addReg(Cond[4].getReg()); - } + } break; + case 7: { // cb[b,h] + // We must insert a cmp, that is a subs, but also zero- or sign-extensions + // that have been folded. For the first operand we codegen an explicit + // extension, for the second operand we fold the extension into cmp. + // 0 1 2 3 4 5 6 + // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 } + + // We need a new register for the now explicitly extended register + Register Reg = Cond[4].getReg(); + if (Cond[5].getImm() != AArch64_AM::InvalidShiftExtend) { + unsigned ExtOpc; + unsigned ExtBits; + AArch64_AM::ShiftExtendType ExtendType = + AArch64_AM::getExtendType(Cond[5].getImm()); + switch (ExtendType) { + default: + llvm_unreachable("Unknown shift-extend for CB instruction"); + case AArch64_AM::SXTB: + assert( + Cond[1].getImm() == AArch64::CBBAssertExt && + "Unexpected compare-and-branch instruction for SXTB shift-extend"); + ExtOpc = AArch64::SBFMWri; + ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32); + break; + case AArch64_AM::SXTH: + assert( + Cond[1].getImm() == AArch64::CBHAssertExt && + "Unexpected compare-and-branch instruction for SXTH shift-extend"); + ExtOpc = AArch64::SBFMWri; + ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32); + break; + case AArch64_AM::UXTB: + assert( + Cond[1].getImm() == AArch64::CBBAssertExt && + "Unexpected compare-and-branch instruction for UXTB shift-extend"); + ExtOpc = AArch64::ANDWri; + ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32); + break; + case AArch64_AM::UXTH: + assert( + Cond[1].getImm() == AArch64::CBHAssertExt && + "Unexpected compare-and-branch instruction for UXTH shift-extend"); + ExtOpc = AArch64::ANDWri; + ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32); + break; + } + + // Build the explicit extension of the first operand + Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass); + MachineInstrBuilder MBBI = + BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg()); + if (ExtOpc != AArch64::ANDWri) + MBBI.addImm(0); + MBBI.addImm(ExtBits); + } + + // Now, subs with an extended second operand + if (Cond[6].getImm() != AArch64_AM::InvalidShiftExtend) { + AArch64_AM::ShiftExtendType ExtendType = + AArch64_AM::getExtendType(Cond[6].getImm()); + MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg())); + MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass); + BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR) + .addReg(Cond[3].getReg()) + .addReg(Reg) + .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0)); + } // If no extension is needed, just a regular subs + else { + MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg())); + MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass); + BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR) + .addReg(Cond[3].getReg()) + .addReg(Reg); + } + + CC = static_cast(Cond[2].getImm()); + } break; } unsigned Opc = 0; @@ -9372,6 +9473,8 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { case AArch64::Bcc: case AArch64::CBWPri: case AArch64::CBXPri: + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: case AArch64::CBWPrr: case AArch64::CBXPrr: return false; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 59b9c0da008f4..2de2e0d73901f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -720,6 +720,8 @@ static inline bool isCondBranchOpcode(int Opc) { case AArch64::TBNZX: case AArch64::CBWPri: case AArch64::CBXPri: + case AArch64::CBBAssertExt: + case AArch64::CBHAssertExt: case AArch64::CBWPrr: case AArch64::CBXPrr: return true; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 84f0d47c02bad..094917b33bc17 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -11318,23 +11318,37 @@ let Predicates = [HasCMPBR] in { defm : CmpBranchWRegisterAlias<"cbhlt", "CBHGT">; // Pseudos for codegen - def CBWPrr : CmpBranchRegisterPseudo; - def CBXPrr : CmpBranchRegisterPseudo; - def CBWPri : CmpBranchImmediatePseudo; - def CBXPri : CmpBranchImmediatePseudo; - - def : Pat<(AArch64CB i32:$Cond, GPR32:$Rn, CmpBranchUImm6Operand_32b:$Imm, - bb:$Target), - (CBWPri i32:$Cond, GPR32:$Rn, uimm6_32b:$Imm, - am_brcmpcond:$Target)>; - def : Pat<(AArch64CB i32:$Cond, GPR64:$Rn, CmpBranchUImm6Operand_64b:$Imm, - bb:$Target), - (CBXPri i32:$Cond, GPR64:$Rn, uimm6_64b:$Imm, - am_brcmpcond:$Target)>; - def : Pat<(AArch64CB i32:$Cond, GPR32:$Rn, GPR32:$Rt, bb:$Target), - (CBWPrr ccode:$Cond, GPR32:$Rn, GPR32:$Rt, am_brcmpcond:$Target)>; - def : Pat<(AArch64CB i32:$Cond, GPR64:$Rn, GPR64:$Rt, bb:$Target), - (CBXPrr ccode:$Cond, GPR64:$Rn, GPR64:$Rt, am_brcmpcond:$Target)>; + def CBBAssertExt : CmpBranchExtRegisterPseudo; + def CBHAssertExt : CmpBranchExtRegisterPseudo; + def CBWPrr : CmpBranchRegisterPseudo; + def CBXPrr : CmpBranchRegisterPseudo; + def CBWPri : CmpBranchImmediatePseudo; + def CBXPri : CmpBranchImmediatePseudo; + + def : Pat<(AArch64CB i32:$Cond, GPR32:$Rn, CmpBranchUImm6Operand_32b:$Imm, + bb:$Target), + (CBWPri i32:$Cond, GPR32:$Rn, uimm6_32b:$Imm, am_brcmpcond:$Target)>; + def : Pat<(AArch64CB i32:$Cond, GPR64:$Rn, CmpBranchUImm6Operand_64b:$Imm, + bb:$Target), + (CBXPri i32:$Cond, GPR64:$Rn, uimm6_64b:$Imm, am_brcmpcond:$Target)>; + def : Pat<(AArch64CB i32:$Cond, GPR32:$Rn, GPR32:$Rt, bb:$Target), + (CBWPrr ccode:$Cond, GPR32:$Rn, GPR32:$Rt, am_brcmpcond:$Target)>; + def : Pat<(AArch64CB i32:$Cond, GPR64:$Rn, GPR64:$Rt, bb:$Target), + (CBXPrr ccode:$Cond, GPR64:$Rn, GPR64:$Rt, am_brcmpcond:$Target)>; + + def : Pat<(AArch64CB i32:$Cond, + (CmpBranchBExtOperand GPR32:$Rn, simm8_32b:$ExtTypeRn), + (CmpBranchBExtOperand GPR32:$Rt, simm8_32b:$ExtTypeRt), + bb:$Target), + (CBBAssertExt ccode:$Cond, GPR32:$Rn, GPR32:$Rt, bb:$Target, + simm8_32b:$ExtTypeRn, simm8_32b:$ExtTypeRt)>; + + def : Pat<(AArch64CB i32:$Cond, + (CmpBranchHExtOperand GPR32:$Rn, simm8_32b:$ExtTypeRn), + (CmpBranchHExtOperand GPR32:$Rt, simm8_32b:$ExtTypeRt), + bb:$Target), + (CBHAssertExt ccode:$Cond, GPR32:$Rn, GPR32:$Rt, bb:$Target, + simm8_32b:$ExtTypeRn, simm8_32b:$ExtTypeRt)>; } // HasCMPBR diff --git a/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir b/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir index c3377164f357e..a173780935ae6 100644 --- a/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir +++ b/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=arm64-apple-ios -mattr +cmpbr -run-pass=early-ifcvt -simplify-mir -o - %s | FileCheck %s -# CHECK: cb_diamond +# RUN: llc -mtriple=arm64-apple-ios -mattr +cmpbr -run-pass=early-ifcvt -verify-machineinstrs -simplify-mir -o - %s | FileCheck %s --- name: cb_diamond alignment: 4 @@ -114,3 +113,435 @@ body: | $x0 = COPY %4 RET_ReallyLR implicit $x0 ... +--- +name: cbb_diamond_no_ext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbb_diamond_no_ext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: $wzr = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBBAssertExt 11, %0, %1, %bb.1, -1, -1 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... +--- +name: cbb_diamond_zext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbb_diamond_zext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[COPY1]], 7 + ; CHECK-NEXT: $wzr = SUBSWrx [[COPY]], [[ANDWri]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBBAssertExt 11, %0, %1, %bb.1, 0, 0 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... +--- +name: cbb_diamond_sext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbb_diamond_sext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: [[SBFMWri:%[0-9]+]]:gpr32common = SBFMWri [[COPY1]], 0, 7 + ; CHECK-NEXT: $wzr = SUBSWrx [[COPY]], [[SBFMWri]], 32, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBBAssertExt 11, %0, %1, %bb.1, 4, 4 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... +--- +name: cbh_diamond_zext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbh_diamond_zext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[COPY1]], 15 + ; CHECK-NEXT: $wzr = SUBSWrx [[COPY]], [[ANDWri]], 8, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBHAssertExt 11, %0, %1, %bb.1, 1, 1 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... +--- +name: cbh_diamond_sext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbh_diamond_sext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: [[SBFMWri:%[0-9]+]]:gpr32common = SBFMWri [[COPY1]], 0, 15 + ; CHECK-NEXT: $wzr = SUBSWrx [[COPY]], [[SBFMWri]], 40, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBHAssertExt 11, %0, %1, %bb.1, 5, 5 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... +--- +name: cbh_diamond_lhs_sext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbh_diamond_lhs_sext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: [[SBFMWri:%[0-9]+]]:gpr32common = SBFMWri [[COPY1]], 0, 15 + ; CHECK-NEXT: $wzr = SUBSWrr [[COPY]], [[SBFMWri]], implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBHAssertExt 11, %0, %1, %bb.1, 5, -1 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... +--- +name: cbh_diamond_rhs_sext +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cbh_diamond_rhs_sext + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], $wzr + ; CHECK-NEXT: $wzr = SUBSWrx [[COPY]], [[COPY1]], 40, implicit-def $nzcv + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[ADDWrr]], [[MADDWrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr killed [[CSELWr]], [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBHAssertExt 11, %0, %1, %bb.1, -1, 5 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr32 = ADDWrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr32 = MADDWrrr %0, %1, $wzr + B %bb.3 + + bb.3: + %4:gpr32 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr32 = ADDWrr killed %4, %0 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll b/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll index 9e95434564f02..f520456235147 100644 --- a/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll @@ -2,23 +2,624 @@ ; RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-CMPBR ; RUN: llc -mtriple arm64-apple-ios -mattr -cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-NO-CMPBR +define void @cbgt_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbgt_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbgt w0, w1, LBB0_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbgt_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxtb w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxtb +; CHECK-NO-CMPBR-NEXT: b.gt LBB0_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbge_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbge w0, w1, LBB1_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxtb w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxtb +; CHECK-NO-CMPBR-NEXT: b.ge LBB1_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + + +define void @cbhi_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbhi_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbhi w0, w1, LBB2_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhi_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxtb +; CHECK-NO-CMPBR-NEXT: b.hi LBB2_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhs_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbhs_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbhs w0, w1, LBB3_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhs_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxtb +; CHECK-NO-CMPBR-NEXT: b.hs LBB3_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbeq_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbeq_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbeq w0, w1, LBB4_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbeq_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxtb +; CHECK-NO-CMPBR-NEXT: b.eq LBB4_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbne_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbne_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbne w0, w1, LBB5_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbne_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxtb +; CHECK-NO-CMPBR-NEXT: b.ne LBB5_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cble_ge_swap_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cble_ge_swap_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbge w1, w0, LBB6_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_ge_swap_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxtb w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxtb +; CHECK-NO-CMPBR-NEXT: b.le LBB6_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblo_hi_swap_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cblo_hi_swap_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbhi w1, w0, LBB7_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblo_hi_swap_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxtb +; CHECK-NO-CMPBR-NEXT: b.lo LBB7_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbls_hs_swap_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbls_hs_swap_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbhs w1, w0, LBB8_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbls_hs_swap_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxtb +; CHECK-NO-CMPBR-NEXT: b.ls LBB8_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblt_gt_swap_i8(i8 %a, i8 %b) { +; CHECK-CMPBR-LABEL: cblt_gt_swap_i8: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbbgt w1, w0, LBB9_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblt_gt_swap_i8: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxtb w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxtb +; CHECK-NO-CMPBR-NEXT: b.lt LBB9_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbgt_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbgt_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhgt w0, w1, LBB10_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbgt_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxth w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxth +; CHECK-NO-CMPBR-NEXT: b.gt LBB10_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbge_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhge w0, w1, LBB11_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxth w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxth +; CHECK-NO-CMPBR-NEXT: b.ge LBB11_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + + +define void @cbhi_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbhi_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhhi w0, w1, LBB12_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhi_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xffff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxth +; CHECK-NO-CMPBR-NEXT: b.hi LBB12_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhs_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbhs_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhhs w0, w1, LBB13_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhs_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xffff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxth +; CHECK-NO-CMPBR-NEXT: b.hs LBB13_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbeq_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbeq_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbheq w0, w1, LBB14_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbeq_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xffff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxth +; CHECK-NO-CMPBR-NEXT: b.eq LBB14_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbne_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbne_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhne w0, w1, LBB15_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbne_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xffff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxth +; CHECK-NO-CMPBR-NEXT: b.ne LBB15_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cble_ge_swap_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cble_ge_swap_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhge w1, w0, LBB16_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_ge_swap_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxth w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxth +; CHECK-NO-CMPBR-NEXT: b.le LBB16_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblo_hi_swap_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cblo_hi_swap_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhhi w1, w0, LBB17_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblo_hi_swap_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xffff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxth +; CHECK-NO-CMPBR-NEXT: b.lo LBB17_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbls_hs_swap_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cbls_hs_swap_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhhs w1, w0, LBB18_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbls_hs_swap_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: and w8, w0, #0xffff +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, uxth +; CHECK-NO-CMPBR-NEXT: b.ls LBB18_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblt_gt_swap_i16(i16 %a, i16 %b) { +; CHECK-CMPBR-LABEL: cblt_gt_swap_i16: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhgt w1, w0, LBB19_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblt_gt_swap_i16: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: sxth w8, w0 +; CHECK-NO-CMPBR-NEXT: cmp w8, w1, sxth +; CHECK-NO-CMPBR-NEXT: b.lt LBB19_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} define void @cbgt_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbgt_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbgt w0, w1, LBB0_2 +; CHECK-CMPBR-NEXT: cbgt w0, w1, LBB20_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB20_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbgt_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.gt LBB0_2 +; CHECK-NO-CMPBR-NEXT: b.gt LBB20_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB20_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp sgt i32 %a, %b @@ -35,19 +636,19 @@ if.end: define void @cbge_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbge_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbge w0, w1, LBB1_2 +; CHECK-CMPBR-NEXT: cbge w0, w1, LBB21_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB21_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbge_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.ge LBB1_2 +; CHECK-NO-CMPBR-NEXT: b.ge LBB21_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB21_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp sge i32 %a, %b @@ -65,19 +666,19 @@ if.end: define void @cbhi_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbhi_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhi w0, w1, LBB2_2 +; CHECK-CMPBR-NEXT: cbhi w0, w1, LBB22_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB22_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbhi_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.hi LBB2_2 +; CHECK-NO-CMPBR-NEXT: b.hi LBB22_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB22_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ugt i32 %a, %b @@ -94,19 +695,19 @@ if.end: define void @cbhs_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbhs_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhs w0, w1, LBB3_2 +; CHECK-CMPBR-NEXT: cbhs w0, w1, LBB23_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB23_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbhs_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.hs LBB3_2 +; CHECK-NO-CMPBR-NEXT: b.hs LBB23_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB23_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp uge i32 %a, %b @@ -123,19 +724,19 @@ if.end: define void @cbeq_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbeq_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbeq w0, w1, LBB4_2 +; CHECK-CMPBR-NEXT: cbeq w0, w1, LBB24_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB24_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbeq_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.eq LBB4_2 +; CHECK-NO-CMPBR-NEXT: b.eq LBB24_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB24_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp eq i32 %a, %b @@ -152,19 +753,19 @@ if.end: define void @cbne_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbne_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbne w0, w1, LBB5_2 +; CHECK-CMPBR-NEXT: cbne w0, w1, LBB25_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB25_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbne_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.ne LBB5_2 +; CHECK-NO-CMPBR-NEXT: b.ne LBB25_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB25_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ne i32 %a, %b @@ -181,19 +782,19 @@ if.end: define void @cble_ge_swap_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cble_ge_swap_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbge w1, w0, LBB6_2 +; CHECK-CMPBR-NEXT: cbge w1, w0, LBB26_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB26_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cble_ge_swap_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.le LBB6_2 +; CHECK-NO-CMPBR-NEXT: b.le LBB26_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB26_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp sle i32 %a, %b @@ -210,19 +811,19 @@ if.end: define void @cblo_hi_swap_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cblo_hi_swap_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhi w1, w0, LBB7_2 +; CHECK-CMPBR-NEXT: cbhi w1, w0, LBB27_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB27_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cblo_hi_swap_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.lo LBB7_2 +; CHECK-NO-CMPBR-NEXT: b.lo LBB27_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB27_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ult i32 %a, %b @@ -239,19 +840,19 @@ if.end: define void @cbls_hs_swap_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cbls_hs_swap_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhs w1, w0, LBB8_2 +; CHECK-CMPBR-NEXT: cbhs w1, w0, LBB28_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB28_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbls_hs_swap_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.ls LBB8_2 +; CHECK-NO-CMPBR-NEXT: b.ls LBB28_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB28_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ule i32 %a, %b @@ -268,19 +869,19 @@ if.end: define void @cblt_gt_swap_i32(i32 %a, i32 %b) { ; CHECK-CMPBR-LABEL: cblt_gt_swap_i32: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbgt w1, w0, LBB9_2 +; CHECK-CMPBR-NEXT: cbgt w1, w0, LBB29_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB29_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cblt_gt_swap_i32: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp w0, w1 -; CHECK-NO-CMPBR-NEXT: b.lt LBB9_2 +; CHECK-NO-CMPBR-NEXT: b.lt LBB29_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB29_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp slt i32 %a, %b @@ -297,19 +898,19 @@ if.end: define void @cbgt_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbgt_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbgt x0, x1, LBB10_2 +; CHECK-CMPBR-NEXT: cbgt x0, x1, LBB30_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB30_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbgt_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.gt LBB10_2 +; CHECK-NO-CMPBR-NEXT: b.gt LBB30_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB30_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp sgt i64 %a, %b @@ -326,19 +927,19 @@ if.end: define void @cbge_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbge_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbge x0, x1, LBB11_2 +; CHECK-CMPBR-NEXT: cbge x0, x1, LBB31_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB31_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbge_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.ge LBB11_2 +; CHECK-NO-CMPBR-NEXT: b.ge LBB31_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB31_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp sge i64 %a, %b @@ -356,19 +957,19 @@ if.end: define void @cbhi_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbhi_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhi x0, x1, LBB12_2 +; CHECK-CMPBR-NEXT: cbhi x0, x1, LBB32_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB32_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbhi_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.hi LBB12_2 +; CHECK-NO-CMPBR-NEXT: b.hi LBB32_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB32_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ugt i64 %a, %b @@ -385,19 +986,19 @@ if.end: define void @cbhs_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbhs_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhs x0, x1, LBB13_2 +; CHECK-CMPBR-NEXT: cbhs x0, x1, LBB33_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB33_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbhs_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.hs LBB13_2 +; CHECK-NO-CMPBR-NEXT: b.hs LBB33_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB33_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp uge i64 %a, %b @@ -414,19 +1015,19 @@ if.end: define void @cbeq_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbeq_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbeq x0, x1, LBB14_2 +; CHECK-CMPBR-NEXT: cbeq x0, x1, LBB34_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB34_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbeq_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.eq LBB14_2 +; CHECK-NO-CMPBR-NEXT: b.eq LBB34_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB34_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp eq i64 %a, %b @@ -443,19 +1044,19 @@ if.end: define void @cbne_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbne_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbne x0, x1, LBB15_2 +; CHECK-CMPBR-NEXT: cbne x0, x1, LBB35_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB35_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbne_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.ne LBB15_2 +; CHECK-NO-CMPBR-NEXT: b.ne LBB35_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB35_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ne i64 %a, %b @@ -472,19 +1073,19 @@ if.end: define void @cble_ge_swap_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cble_ge_swap_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbge x1, x0, LBB16_2 +; CHECK-CMPBR-NEXT: cbge x1, x0, LBB36_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB36_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cble_ge_swap_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.le LBB16_2 +; CHECK-NO-CMPBR-NEXT: b.le LBB36_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB36_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp sle i64 %a, %b @@ -501,19 +1102,19 @@ if.end: define void @cblo_hi_swap_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cblo_hi_swap_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhi x1, x0, LBB17_2 +; CHECK-CMPBR-NEXT: cbhi x1, x0, LBB37_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB37_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cblo_hi_swap_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.lo LBB17_2 +; CHECK-NO-CMPBR-NEXT: b.lo LBB37_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB37_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ult i64 %a, %b @@ -530,19 +1131,19 @@ if.end: define void @cbls_hs_swap_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cbls_hs_swap_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbhs x1, x0, LBB18_2 +; CHECK-CMPBR-NEXT: cbhs x1, x0, LBB38_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB38_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cbls_hs_swap_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.ls LBB18_2 +; CHECK-NO-CMPBR-NEXT: b.ls LBB38_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB38_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp ule i64 %a, %b @@ -559,19 +1160,19 @@ if.end: define void @cblt_gt_swap_i64(i64 %a, i64 %b) { ; CHECK-CMPBR-LABEL: cblt_gt_swap_i64: ; CHECK-CMPBR: ; %bb.0: ; %entry -; CHECK-CMPBR-NEXT: cbgt x1, x0, LBB19_2 +; CHECK-CMPBR-NEXT: cbgt x1, x0, LBB39_2 ; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-CMPBR-NEXT: ret -; CHECK-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-CMPBR-NEXT: LBB39_2: ; %if.then ; CHECK-CMPBR-NEXT: brk #0x1 ; ; CHECK-NO-CMPBR-LABEL: cblt_gt_swap_i64: ; CHECK-NO-CMPBR: ; %bb.0: ; %entry ; CHECK-NO-CMPBR-NEXT: cmp x0, x1 -; CHECK-NO-CMPBR-NEXT: b.lt LBB19_2 +; CHECK-NO-CMPBR-NEXT: b.lt LBB39_2 ; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end ; CHECK-NO-CMPBR-NEXT: ret -; CHECK-NO-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: LBB39_2: ; %if.then ; CHECK-NO-CMPBR-NEXT: brk #0x1 entry: %cmp = icmp slt i64 %a, %b diff --git a/llvm/test/CodeGen/AArch64/cmpbr-zext-sext.ll b/llvm/test/CodeGen/AArch64/cmpbr-zext-sext.ll new file mode 100644 index 0000000000000..e80db59ad5451 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpbr-zext-sext.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-CMPBR +; RUN: llc -mtriple arm64-apple-ios -mattr -cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-NO-CMPBR + +define void @cbb_assertsext_eq(i8 signext %a, i8 signext %b) { +; CHECK-CMPBR-LABEL: cbb_assertsext_eq: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbbeq w0, w1, LBB0_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbb_assertsext_eq: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.eq LBB0_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp eq i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbb_assertsext_sgt(i8 signext %a, i8 signext %b) { +; CHECK-CMPBR-LABEL: cbb_assertsext_sgt: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbbgt w0, w1, LBB1_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbb_assertsext_sgt: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.gt LBB1_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp sgt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbh_assertsext_slt(i16 signext %a, i16 signext %b) { +; CHECK-CMPBR-LABEL: cbh_assertsext_slt: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbhgt w1, w0, LBB2_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbh_assertsext_slt: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.lt LBB2_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp slt i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbb_assertzext_eq(i8 zeroext %a, i8 zeroext %b) { +; CHECK-CMPBR-LABEL: cbb_assertzext_eq: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbbeq w0, w1, LBB3_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbb_assertzext_eq: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.eq LBB3_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp eq i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbb_assertzext_ugt(i8 zeroext %a, i8 zeroext %b) { +; CHECK-CMPBR-LABEL: cbb_assertzext_ugt: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbbhi w0, w1, LBB4_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbb_assertzext_ugt: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.hi LBB4_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp ugt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbh_assertzext_ule(i16 zeroext %a, i16 zeroext %b) { +; CHECK-CMPBR-LABEL: cbh_assertzext_ule: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbhhs w1, w0, LBB5_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbh_assertzext_ule: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.ls LBB5_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp ule i16 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbb_mixed_assertsext_only_first(i8 signext %a, i8 %b) { +; CHECK-CMPBR-LABEL: cbb_mixed_assertsext_only_first: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbbgt w0, w1, LBB6_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbb_mixed_assertsext_only_first: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w0, w1, sxtb +; CHECK-NO-CMPBR-NEXT: b.gt LBB6_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp sgt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbb_mixed_assertzext_only_second(i8 %a, i8 zeroext %b) { +; CHECK-CMPBR-LABEL: cbb_mixed_assertzext_only_second: +; CHECK-CMPBR: ; %bb.0: +; CHECK-CMPBR-NEXT: cbbhi w0, w1, LBB7_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbb_mixed_assertzext_only_second: +; CHECK-NO-CMPBR: ; %bb.0: +; CHECK-NO-CMPBR-NEXT: cmp w1, w0, uxtb +; CHECK-NO-CMPBR-NEXT: b.lo LBB7_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 + %cmp = icmp ugt i8 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + + +declare void @llvm.trap() From c40a69439e277873343e63fd9912ded1c0790ee1 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Fri, 14 Nov 2025 13:47:36 -0800 Subject: [PATCH 42/56] Remove instr-ref-target-hooks-sp-clobber.mir (#168125) This test is failing on the chromium x64 mac build because of invalid MIR. The rest of the patch is okay, so I am just deleting the test for now. --- .../instr-ref-target-hooks-sp-clobber.mir | 190 ------------------ 1 file changed, 190 deletions(-) delete mode 100644 llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir diff --git a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir deleted file mode 100644 index 4b3e8eab3e1e4..0000000000000 --- a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir +++ /dev/null @@ -1,190 +0,0 @@ -# Test to ensure that variable "__last" is properly recovered at the end of the livedebugvalues pass when Instruction Referencing-based LiveDebugValues is used. -# This testcase was obtained by looking at FileCheck.cpp and reducing it down via llvm-reduce. -# This test is the same as llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll, however, the output is taken just before the livedebugvalues pass, and then a clobber -# to the stack slot has been added after the first LDRXui in bb.2.if.then13, the livedebugvalues pass should still recover the value, as it was loaded into $x8 before the clobber. - -# REQUIRES: system-darwin - -# RUN: llc -o - %s -run-pass=livedebugvalues | FileCheck %s - -# CHECK: ![[LOC:[0-9]+]] = !DILocalVariable(name: "__last", -# CHECK-LABEL: bb.2.if.then13 -# CHECK: STRXui $xzr, $sp, 1 -# CHECK-NEXT: DBG_VALUE_LIST ![[LOC]], !DIExpression(DW_OP_LLVM_arg, 0), $x8 - ---- | - ; ModuleID = '/Users/srastogi/Development/llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll' - source_filename = "/Users/srastogi/Development/llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll" - target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" - - declare void @_ZdlPvm() - - define fastcc void @"_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb"(ptr %__first, ptr %__last, i1 %cmp, ptr %__first.addr.0, ptr %Label3.i.i.i241, ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, ptr %1) !dbg !4 { - br label %while.cond - - while.cond: ; preds = %if.end16, %2 - br i1 %cmp, label %if.then13, label %if.end16 - - if.then13: ; preds = %while.cond - %cmp.i = icmp eq ptr %__first, %__last - %or.cond.i = select i1 %cmp.i, i1 false, i1 false - #dbg_value(ptr %__last, !10, !DIExpression(), !16) - br i1 %or.cond.i, label %common.ret, label %for.body.i, !dbg !20 - - common.ret: ; preds = %for.body.i, %if.then13 - ret void - - for.body.i: ; preds = %if.then13 - %InputLine.i.i = getelementptr i8, ptr %__first.addr.0, i64 132 - br label %common.ret - - if.end16: ; preds = %while.cond - %__pivot.sroa.13.8.copyload.i = load i64, ptr null, align 8 - call void @_ZdlPvm() - store ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, align 8 - store i64 %__pivot.sroa.13.8.copyload.i, ptr %1, align 8 - store i64 0, ptr %__first, align 8 - store i32 0, ptr %__first.addr.0, align 8 - store i32 1, ptr %Label3.i.i.i241, align 4 - br label %while.cond - } - - !llvm.module.flags = !{!0} - !llvm.dbg.cu = !{!1} - - !0 = !{i32 2, !"Debug Info Version", i32 3} - !1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !2, producer: "clang version 22.0.0git (git@github.com:llvm/llvm-project.git 46a3b4d5dc6dd9449ec7c0c9065552368cdf41d6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, retainedTypes: !3, globals: !3, imports: !3, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk", sdk: "MacOSX15.3.sdk") - !2 = !DIFile(filename: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/build-instr-ref-stage2", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") - !3 = !{} - !4 = distinct !DISubprogram(name: "__introsort", linkageName: "_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb", scope: !6, file: !5, line: 758, type: !8, scopeLine: 762, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) - !5 = !DIFile(filename: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk/usr/include/c++/v1/__algorithm/sort.h", directory: "") - !6 = !DINamespace(name: "__1", scope: !7, exportSymbols: true) - !7 = !DINamespace(name: "std", scope: null) - !8 = !DISubroutineType(cc: DW_CC_nocall, types: !9) - !9 = !{null} - !10 = !DILocalVariable(name: "__last", arg: 2, scope: !11, file: !5, line: 284, type: !13) - !11 = distinct !DISubprogram(name: "__insertion_sort", linkageName: "_ZNSt3__116__insertion_sortB8nn180100INS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_EEvT1_SJ_T0_", scope: !6, file: !5, line: 284, type: !12, scopeLine: 284, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) - !12 = distinct !DISubroutineType(types: !9) - !13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) - !14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InputAnnotation", file: !15, line: 323, size: 768, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !3, identifier: "_ZTS15InputAnnotation") - !15 = !DIFile(filename: "llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") - !16 = !DILocation(line: 0, scope: !11, inlinedAt: !17) - !17 = distinct !DILocation(line: 800, column: 9, scope: !18) - !18 = distinct !DILexicalBlock(scope: !19, file: !5, line: 799, column: 23) - !19 = distinct !DILexicalBlock(scope: !4, file: !5, line: 770, column: 16) - !20 = !DILocation(line: 288, column: 15, scope: !21, inlinedAt: !17, atomGroup: 1, atomRank: 1) - !21 = distinct !DILexicalBlock(scope: !11, file: !5, line: 288, column: 7) -... ---- -name: '_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb' -alignment: 4 -tracksRegLiveness: true -noPhis: true -isSSA: false -noVRegs: true -hasFakeUses: false -debugInstrRef: true -tracksDebugUserValues: true -liveins: - - { reg: '$x0' } - - { reg: '$x1' } - - { reg: '$w2' } - - { reg: '$x3' } - - { reg: '$x4' } - - { reg: '$x5' } - - { reg: '$x6' } - - { reg: '$x7' } -frameInfo: - stackSize: 112 - maxAlignment: 8 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 - isCalleeSavedInfoValid: true -stack: - - { id: 0, type: spill-slot, offset: -104, size: 8, alignment: 8 } - - { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '$lr' } - - { id: 2, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '$fp' } - - { id: 3, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$x19' } - - { id: 4, type: spill-slot, offset: -32, size: 8, alignment: 8, callee-saved-register: '$x20' } - - { id: 5, type: spill-slot, offset: -40, size: 8, alignment: 8, callee-saved-register: '$x21' } - - { id: 6, type: spill-slot, offset: -48, size: 8, alignment: 8, callee-saved-register: '$x22' } - - { id: 7, type: spill-slot, offset: -56, size: 8, alignment: 8, callee-saved-register: '$x23' } - - { id: 8, type: spill-slot, offset: -64, size: 8, alignment: 8, callee-saved-register: '$x24' } - - { id: 9, type: spill-slot, offset: -72, size: 8, alignment: 8, callee-saved-register: '$x25' } - - { id: 10, type: spill-slot, offset: -80, size: 8, alignment: 8, callee-saved-register: '$x26' } - - { id: 11, type: spill-slot, offset: -88, size: 8, alignment: 8, callee-saved-register: '$x27' } - - { id: 12, type: spill-slot, offset: -96, size: 8, alignment: 8, callee-saved-register: '$x28' } -machineFunctionInfo: - hasRedZone: false - stackSizeZPR: 0 - stackSizePPR: 0 - hasStackFrame: true -body: | - bb.0 (%ir-block.2): - successors: %bb.2(0x04000000), %bb.3(0x7c000000) - liveins: $w2, $x0, $x1, $x3, $x4, $x5, $x6, $x7, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20, $lr - - $sp = frame-setup SUBXri $sp, 112, 0 - frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.12), (store (s64) into %stack.11) - frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.10), (store (s64) into %stack.9) - frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.8), (store (s64) into %stack.7) - frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.6), (store (s64) into %stack.5) - frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.4), (store (s64) into %stack.3) - frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store (s64) into %stack.2), (store (s64) into %stack.1) - frame-setup CFI_INSTRUCTION def_cfa_offset 112 - frame-setup CFI_INSTRUCTION offset $w30, -8 - frame-setup CFI_INSTRUCTION offset $w29, -16 - frame-setup CFI_INSTRUCTION offset $w19, -24 - frame-setup CFI_INSTRUCTION offset $w20, -32 - frame-setup CFI_INSTRUCTION offset $w21, -40 - frame-setup CFI_INSTRUCTION offset $w22, -48 - frame-setup CFI_INSTRUCTION offset $w23, -56 - frame-setup CFI_INSTRUCTION offset $w24, -64 - frame-setup CFI_INSTRUCTION offset $w25, -72 - frame-setup CFI_INSTRUCTION offset $w26, -80 - frame-setup CFI_INSTRUCTION offset $w27, -88 - frame-setup CFI_INSTRUCTION offset $w28, -96 - DBG_PHI $x1, 1 - $x19 = ORRXrs $xzr, killed $x7, 0 - $x20 = ORRXrs $xzr, killed $x6, 0 - $x21 = ORRXrs $xzr, killed $x5, 0 - $x22 = ORRXrs $xzr, killed $x4, 0 - $x23 = ORRXrs $xzr, killed $x3, 0 - $w25 = ORRWrs $wzr, killed $w2, 0 - $x26 = ORRXrs $xzr, killed $x0, 0 - renamable $w27 = MOVZWi 1, 0 - STRXui killed $x1, $sp, 1 :: (store (s64) into %stack.0) - TBNZW renamable $w25, 0, %bb.2 - - bb.3.if.end16: - successors: %bb.2(0x04000000), %bb.3(0x7c000000) - liveins: $w25, $w27, $x19, $x20, $x21, $x22, $x23, $x26 - - $x28 = ORRXrs $xzr, $xzr, 0 - renamable $x24 = LDRXui killed renamable $x28, 0 :: (load (s64) from `ptr null`) - BL @_ZdlPvm, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp - STRXui renamable $x21, renamable $x20, 0 :: (store (s64) into %ir.0) - STRXui killed renamable $x24, renamable $x19, 0 :: (store (s64) into %ir.1) - STRXui $xzr, renamable $x26, 0 :: (store (s64) into %ir.__first) - STRWui $wzr, renamable $x23, 0 :: (store (s32) into %ir.__first.addr.0, align 8) - STRWui renamable $w27, renamable $x22, 0 :: (store (s32) into %ir.Label3.i.i.i241) - TBZW renamable $w25, 0, %bb.3 - - bb.2.if.then13: - liveins: $x26 - - DBG_INSTR_REF !10, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(1, 0), debug-location !16 - renamable $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.0) - ; Clobber the stack slot that contains the value we care about, to ensure that LDV can still recover it from $x8 above - STRXui $xzr, $sp, 1 :: (store (s64) into %stack.0) - $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.2), (load (s64) from %stack.1) - $x20, $x19 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.4), (load (s64) from %stack.3) - $xzr = SUBSXrs killed renamable $x26, killed renamable $x8, 0, implicit-def $nzcv, debug-location !20 - $x22, $x21 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.6), (load (s64) from %stack.5) - $x24, $x23 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.8), (load (s64) from %stack.7) - $x26, $x25 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.10), (load (s64) from %stack.9) - $x28, $x27 = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.12), (load (s64) from %stack.11) - $sp = frame-destroy ADDXri $sp, 112, 0 - RET undef $lr -... From 14296285f92a1a169cf47e30e15e2936e26ada2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Susan=20Tan=20=28=E3=82=B9-=E3=82=B6=E3=83=B3=E3=80=80?= =?UTF-8?q?=E3=82=BF=E3=83=B3=29?= Date: Fri, 14 Nov 2025 16:49:56 -0500 Subject: [PATCH 43/56] [flang][cuf] Add to cuf.alloc/cuf.allocate mem alloc effect (#167414) Add MemAlloc effect to the result so that cuf.alloc/cuf.allocate can be recognized by FIR alias analysis. --- .../flang/Optimizer/Dialect/CUF/CUFOps.td | 7 +++--- .../AliasAnalysis/cuf-alloc-source-kind.mlir | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 flang/test/Analysis/AliasAnalysis/cuf-alloc-source-kind.mlir diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index e38738230ffbc..07bb47e26b968 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -26,8 +26,7 @@ include "mlir/IR/BuiltinAttributes.td" class cuf_Op traits> : Op; -def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments, - MemoryEffects<[MemAlloc]>]> { +def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments]> { let summary = "Allocate an object on device"; let description = [{ @@ -47,7 +46,9 @@ def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments, cuf_DataAttributeAttr:$data_attr ); - let results = (outs fir_ReferenceType:$ptr); + // Value-scoped Allocate on the returned reference + let results = + (outs Res]>:$ptr); let assemblyFormat = [{ $in_type (`(` $typeparams^ `:` type($typeparams) `)`)? diff --git a/flang/test/Analysis/AliasAnalysis/cuf-alloc-source-kind.mlir b/flang/test/Analysis/AliasAnalysis/cuf-alloc-source-kind.mlir new file mode 100644 index 0000000000000..f062dcb3a3360 --- /dev/null +++ b/flang/test/Analysis/AliasAnalysis/cuf-alloc-source-kind.mlir @@ -0,0 +1,22 @@ +// REQUIRES: asserts +// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -debug-only=fir-alias-analysis --mlir-disable-threading 2>&1 | FileCheck %s + +// Verify that a CUF allocation is recognized as SourceKind::Allocate by +// fir::AliasAnalysis::getSource. + +module { + func.func @_QQmain() attributes {fir.bindc_name = "TEST"} { + // Allocate two independent device arrays and tag the results; with + // value-scoped MemAlloc handling in AA, these should be classified as + // Allocate and not alias. + %a = cuf.alloc !fir.box>> {bindc_name = "a1", data_attr = #cuf.cuda, uniq_name = "_QFEa1", test.ptr = "cuf_alloc_a"} -> !fir.ref>>> + %b = cuf.alloc !fir.box>> {bindc_name = "a2", data_attr = #cuf.cuda, uniq_name = "_QFEa2", test.ptr = "cuf_alloc_b"} -> !fir.ref>>> + return + } +} + +// CHECK-LABEL: Testing : "_QQmain" +// Distinct allocations should not alias. +// CHECK: cuf_alloc_a#0 <-> cuf_alloc_b#0: NoAlias + + From ab08fbd92cb7557c198be1f9de0b59b23f8e92e1 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 6 Oct 2025 16:01:34 -0700 Subject: [PATCH 44/56] [clang] Switch warning suppression multi-match rule to "last match takes precedence" The current "longest match takes precedence" rule for warning suppression mappings can be confusing, especially in long suppression files where tracking the length relationship between globs is difficult. For example, with the following rules, it's not immediately obvious why the first one should currently take precedence: ``` src:*test/* src:*lld/*=emit ``` This commit changes the multi-match behavior so the last match takes precedence. This rule is easier to understand and consistent with the approach used by sanitizers, simplifying the mechanism by providing a uniform experience across different tools. This is potentially breaking, but very unlikely. An investigation of known uses showed they do not rely on the length. Reviewers: thurstond, kadircet, fmayer Pull Request: https://github.com/llvm/llvm-project/pull/162237 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/docs/WarningSuppressionMappings.rst | 4 ++-- clang/include/clang/Basic/Diagnostic.h | 2 +- clang/lib/Basic/Diagnostic.cpp | 14 +++++--------- clang/unittests/Basic/DiagnosticTest.cpp | 8 +++----- 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1b7896ec87119..ad54872913d55 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -69,6 +69,8 @@ Potentially Breaking Changes call the member ``operator delete`` instead of the expected global delete operator. The old behavior is retained under ``-fclang-abi-compat=21`` flag. +- Clang warning suppressions file, ``--warning-suppression-mappings=``, now will + use the last matching entry instead of the longest one. - Trailing null statements in GNU statement expressions are no longer ignored by Clang; they now result in a void type. Clang previously matched GCC's behavior, which was recently clarified to be incorrect. diff --git a/clang/docs/WarningSuppressionMappings.rst b/clang/docs/WarningSuppressionMappings.rst index d96341ac6e563..d8af856f64ef0 100644 --- a/clang/docs/WarningSuppressionMappings.rst +++ b/clang/docs/WarningSuppressionMappings.rst @@ -63,7 +63,7 @@ Format Warning suppression mappings uses the same format as :doc:`SanitizerSpecialCaseList`. -Sections describe which diagnostic group's behaviour to change, e.g. +Sections describe which diagnostic group's behavior to change, e.g. ``[unused]``. When a diagnostic is matched by multiple sections, the latest section takes precedence. @@ -76,7 +76,7 @@ Source files are matched against these globs either: - as paths relative to the current working directory - as absolute paths. -When a source file matches multiple globs in a section, the longest one takes +When a source file matches multiple globs in a section, the last one takes precedence. .. code-block:: bash diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index e540040ddc524..c6e931d0c9517 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -971,7 +971,7 @@ class DiagnosticsEngine : public RefCountedBase { /// diagnostics in specific files. /// Mapping file is expected to be a special case list with sections denoting /// diagnostic groups and `src` entries for globs to suppress. `emit` category - /// can be used to disable suppression. Longest glob that matches a filepath + /// can be used to disable suppression. The last glob that matches a filepath /// takes precedence. For example: /// [unused] /// src:clang/* diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 5e9da245e2b43..4802478c379bb 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -525,8 +525,7 @@ std::unique_ptr WarningsSpecialCaseList::create(const llvm::MemoryBuffer &Input, std::string &Err) { auto WarningSuppressionList = std::make_unique(); - if (!WarningSuppressionList->createInternal(&Input, Err, - /*OrderBySize=*/true)) + if (!WarningSuppressionList->createInternal(&Input, Err)) return nullptr; return WarningSuppressionList; } @@ -588,15 +587,12 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId, StringRef F = llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()); - StringRef LongestSup = DiagSection->getLongestMatch("src", F, ""); - if (LongestSup.empty()) + unsigned LastSup = DiagSection->getLastMatch("src", F, ""); + if (LastSup == 0) return false; - StringRef LongestEmit = DiagSection->getLongestMatch("src", F, "emit"); - if (LongestEmit.empty()) - return true; - - return LongestSup.size() > LongestEmit.size(); + unsigned LastEmit = DiagSection->getLastMatch("src", F, "emit"); + return LastSup > LastEmit; } bool DiagnosticsEngine::isSuppressedViaMapping(diag::kind DiagId, diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index de090864e5095..5492146f40fa9 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -294,7 +294,7 @@ TEST_F(SuppressionMappingTest, EmitCategoryIsExcluded) { locForFile("foo.cpp"))); } -TEST_F(SuppressionMappingTest, LongestMatchWins) { +TEST_F(SuppressionMappingTest, LastMatchWins) { llvm::StringLiteral SuppressionMappingFile = R"( [unused] src:*clang/* @@ -327,10 +327,8 @@ TEST_F(SuppressionMappingTest, LongShortMatch) { EXPECT_TRUE(Diags.isSuppressedViaMapping(diag::warn_unused_function, locForFile("test/t1.cpp"))); - - // FIXME: This is confusing. - EXPECT_TRUE(Diags.isSuppressedViaMapping(diag::warn_unused_function, - locForFile("lld/test/t2.cpp"))); + EXPECT_FALSE(Diags.isSuppressedViaMapping(diag::warn_unused_function, + locForFile("lld/test/t2.cpp"))); } TEST_F(SuppressionMappingTest, ShortLongMatch) { From 0a58e49c44ae7cca39b3eb219efed9f0581b8b0f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 15 Nov 2025 01:05:57 +0300 Subject: [PATCH 45/56] [VE] TableGen-erate SDNode descriptions (#168120) This allows SDNodes to be validated against their expected type profiles and reduces the number of changes required to add a new node. There is a couple of nodes that are missing description and one node that fails validation. Part of #119709. Pull Request: https://github.com/llvm/llvm-project/pull/168120 --- llvm/lib/Target/VE/CMakeLists.txt | 2 + llvm/lib/Target/VE/VECustomDAG.cpp | 1 + llvm/lib/Target/VE/VEISelDAGToDAG.cpp | 1 + llvm/lib/Target/VE/VEISelLowering.cpp | 42 +-------------------- llvm/lib/Target/VE/VEISelLowering.h | 46 ----------------------- llvm/lib/Target/VE/VESelectionDAGInfo.cpp | 44 ++++++++++++++++++++++ llvm/lib/Target/VE/VESelectionDAGInfo.h | 45 ++++++++++++++++++++++ llvm/lib/Target/VE/VESubtarget.cpp | 11 +++++- llvm/lib/Target/VE/VESubtarget.h | 10 ++--- llvm/lib/Target/VE/VVPISelLowering.cpp | 1 + 10 files changed, 110 insertions(+), 93 deletions(-) create mode 100644 llvm/lib/Target/VE/VESelectionDAGInfo.cpp create mode 100644 llvm/lib/Target/VE/VESelectionDAGInfo.h diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt index d1bb4f32fcba7..b06072ddf8519 100644 --- a/llvm/lib/Target/VE/CMakeLists.txt +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -9,6 +9,7 @@ tablegen(LLVM VEGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM VEGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM VEGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM VEGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM VEGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM VEGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM VEGenCallingConv.inc -gen-callingconv) add_public_tablegen_target(VECommonTableGen) @@ -24,6 +25,7 @@ add_llvm_target(VECodeGen VEMachineFunctionInfo.cpp VEMCInstLower.cpp VERegisterInfo.cpp + VESelectionDAGInfo.cpp VESubtarget.cpp VETargetMachine.cpp VVPISelLowering.cpp diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp index 2855a65f654c9..74c21edb3d514 100644 --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "VECustomDAG.h" +#include "VESelectionDAGInfo.h" #ifndef DEBUG_TYPE #define DEBUG_TYPE "vecustomdag" diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp index 4e1bac0e91734..823bfbcb34a07 100644 --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "VE.h" +#include "VESelectionDAGInfo.h" #include "VETargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index a068138791cb4..e1735424a776b 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -17,6 +17,7 @@ #include "VEInstrBuilder.h" #include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" +#include "VESelectionDAGInfo.h" #include "VETargetMachine.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -914,47 +915,6 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, computeRegisterProperties(Subtarget->getRegisterInfo()); } -const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { -#define TARGET_NODE_CASE(NAME) \ - case VEISD::NAME: \ - return "VEISD::" #NAME; - switch ((VEISD::NodeType)Opcode) { - case VEISD::FIRST_NUMBER: - break; - TARGET_NODE_CASE(CMPI) - TARGET_NODE_CASE(CMPU) - TARGET_NODE_CASE(CMPF) - TARGET_NODE_CASE(CMPQ) - TARGET_NODE_CASE(CMOV) - TARGET_NODE_CASE(CALL) - TARGET_NODE_CASE(EH_SJLJ_LONGJMP) - TARGET_NODE_CASE(EH_SJLJ_SETJMP) - TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH) - TARGET_NODE_CASE(GETFUNPLT) - TARGET_NODE_CASE(GETSTACKTOP) - TARGET_NODE_CASE(GETTLSADDR) - TARGET_NODE_CASE(GLOBAL_BASE_REG) - TARGET_NODE_CASE(Hi) - TARGET_NODE_CASE(Lo) - TARGET_NODE_CASE(RET_GLUE) - TARGET_NODE_CASE(TS1AM) - TARGET_NODE_CASE(VEC_UNPACK_LO) - TARGET_NODE_CASE(VEC_UNPACK_HI) - TARGET_NODE_CASE(VEC_PACK) - TARGET_NODE_CASE(VEC_BROADCAST) - TARGET_NODE_CASE(REPL_I32) - TARGET_NODE_CASE(REPL_F32) - - TARGET_NODE_CASE(LEGALAVL) - - // Register the VVP_* SDNodes. -#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME) -#include "VVPNodes.def" - } -#undef TARGET_NODE_CASE - return nullptr; -} - EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { if (VT.isVector()) diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 04274b14baa1f..ad7cf3e902fb6 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -20,51 +20,6 @@ namespace llvm { class VESubtarget; -namespace VEISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - CMPI, // Compare between two signed integer values. - CMPU, // Compare between two unsigned integer values. - CMPF, // Compare between two floating-point values. - CMPQ, // Compare between two quad floating-point values. - CMOV, // Select between two values using the result of comparison. - - CALL, // A call instruction. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. - GETFUNPLT, // Load function address through %plt insturction. - GETTLSADDR, // Load address for TLS access. - GETSTACKTOP, // Retrieve address of stack top (first address of - // locals and temporaries). - GLOBAL_BASE_REG, // Global base reg for PIC. - Hi, // Hi/Lo operations, typically on a global address. - Lo, // Hi/Lo operations, typically on a global address. - RET_GLUE, // Return with a flag operand. - TS1AM, // A TS1AM instruction used for 1/2 bytes swap. - VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector. - VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector. - // 0: v512 vector, 1: AVL - VEC_PACK, // pack a lo and a hi vector into one v512 vector - // 0: v256 lo vector, 1: v256 hi vector, 2: AVL - - VEC_BROADCAST, // A vector broadcast instruction. - // 0: scalar value, 1: VL - REPL_I32, - REPL_F32, // Replicate subregister to other half. - - // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of - // data, whereas the raw EVL coming in from VP nodes always refers to number - // of elements, regardless of their size. - LEGALAVL, - -// VVP_* nodes. -#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME, -#include "VVPNodes.def" -}; -} - /// Convert a DAG integer condition code to a VE ICC condition. inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) { switch (CC) { @@ -167,7 +122,6 @@ class VETargetLowering : public TargetLowering { public: VETargetLowering(const TargetMachine &TM, const VESubtarget &STI); - const char *getTargetNodeName(unsigned Opcode) const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } diff --git a/llvm/lib/Target/VE/VESelectionDAGInfo.cpp b/llvm/lib/Target/VE/VESelectionDAGInfo.cpp new file mode 100644 index 0000000000000..d4a33f4ca837e --- /dev/null +++ b/llvm/lib/Target/VE/VESelectionDAGInfo.cpp @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VESelectionDAGInfo.h" + +#define GET_SDNODE_DESC +#include "VEGenSDNodeInfo.inc" + +using namespace llvm; + +VESelectionDAGInfo::VESelectionDAGInfo() + : SelectionDAGGenTargetInfo(VEGenSDNodeInfo) {} + +VESelectionDAGInfo::~VESelectionDAGInfo() = default; + +const char *VESelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define TARGET_NODE_CASE(NAME) \ + case VEISD::NAME: \ + return "VEISD::" #NAME; + + switch (static_cast(Opcode)) { + TARGET_NODE_CASE(GLOBAL_BASE_REG) + TARGET_NODE_CASE(LEGALAVL) + } +#undef TARGET_NODE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + +void VESelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + case VEISD::GETSTACKTOP: + // result #0 has invalid type; expected ch, got i64 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); +} diff --git a/llvm/lib/Target/VE/VESelectionDAGInfo.h b/llvm/lib/Target/VE/VESelectionDAGInfo.h new file mode 100644 index 0000000000000..98ead1b651fb0 --- /dev/null +++ b/llvm/lib/Target/VE/VESelectionDAGInfo.h @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VESELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_VE_VESELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SDNODE_ENUM +#include "VEGenSDNodeInfo.inc" + +namespace llvm { +namespace VEISD { + +enum NodeType : unsigned { + GLOBAL_BASE_REG = GENERATED_OPCODE_END, // Global base reg for PIC. + + // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of + // data, whereas the raw EVL coming in from VP nodes always refers to number + // of elements, regardless of their size. + LEGALAVL, +}; + +} // namespace VEISD + +class VESelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + VESelectionDAGInfo(); + + ~VESelectionDAGInfo() override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_VE_VESELECTIONDAGINFO_H diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp index 197bffe2b55b7..9c9b1b43d1a04 100644 --- a/llvm/lib/Target/VE/VESubtarget.cpp +++ b/llvm/lib/Target/VE/VESubtarget.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "VESubtarget.h" +#include "VESelectionDAGInfo.h" #include "llvm/MC/TargetRegistry.h" using namespace llvm; @@ -43,7 +44,15 @@ VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(*this) {} + FrameLowering(*this) { + TSInfo = std::make_unique(); +} + +VESubtarget::~VESubtarget() = default; + +const SelectionDAGTargetInfo *VESubtarget::getSelectionDAGInfo() const { + return TSInfo.get(); +} uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const { // Calculate adjusted frame size by adding the size of RSA frame, diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h index 0c3dc0a080723..bc1c9faca5b5f 100644 --- a/llvm/lib/Target/VE/VESubtarget.h +++ b/llvm/lib/Target/VE/VESubtarget.h @@ -16,7 +16,6 @@ #include "VEFrameLowering.h" #include "VEISelLowering.h" #include "VEInstrInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -41,13 +40,15 @@ class VESubtarget : public VEGenSubtargetInfo { VEInstrInfo InstrInfo; VETargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + std::unique_ptr TSInfo; VEFrameLowering FrameLowering; public: VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM); + ~VESubtarget() override; + const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; } const VEFrameLowering *getFrameLowering() const override { return &FrameLowering; @@ -56,9 +57,8 @@ class VESubtarget : public VEGenSubtargetInfo { return &InstrInfo.getRegisterInfo(); } const VETargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; bool enableMachineScheduler() const override; diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp index f1e2d7f717016..2b84529cf3dd1 100644 --- a/llvm/lib/Target/VE/VVPISelLowering.cpp +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -13,6 +13,7 @@ #include "VECustomDAG.h" #include "VEISelLowering.h" +#include "VESelectionDAGInfo.h" using namespace llvm; From 488151254fdeb5803f950060b218a867bf9ee296 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Fri, 14 Nov 2025 14:06:36 -0800 Subject: [PATCH 46/56] Don't check frame base as varies if registers are available from targets. (#168124) Fixes a buildbot issue stemming from https://github.com/llvm/llvm-project/pull/167986 --- llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml b/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml index 3820ca7184d62..07f12cb84503d 100644 --- a/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml +++ b/llvm/test/DebugInfo/dwarfdump-dwp-str-offsets-64.yaml @@ -20,8 +20,7 @@ # CHECK: 0x0000001a: DW_TAG_subprogram # CHECK-NEXT: DW_AT_low_pc (indexed (00000000) address = ) # CHECK-NEXT: DW_AT_high_pc (0x0000000f) -# CHECK-NEXT: DW_AT_frame_base (DW_OP_reg6 RBP) -# CHECK-NEXT: DW_AT_name ("main") +# CHECK: DW_AT_name ("main") # CHECK-NEXT: DW_AT_decl_file (0x00) # CHECK-NEXT: DW_AT_decl_line (1) # CHECK-NEXT: DW_AT_type (0x00000029 "int") From 944278fef441ec3458c1271f6637154762556935 Mon Sep 17 00:00:00 2001 From: Prabhu Rajasekaran Date: Fri, 14 Nov 2025 14:14:54 -0800 Subject: [PATCH 47/56] Revert "[Clang][OpenMP] Bug fix Default clause variable category" (#168130) Reverts llvm/llvm-project#168112 --- clang/lib/Sema/SemaOpenMP.cpp | 6 +- ...allel_default_variableCategory_codegen.cpp | 91 ------------------- 2 files changed, 3 insertions(+), 94 deletions(-) delete mode 100644 clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 31c8f0cd30c56..81c591a00cfc6 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1364,15 +1364,15 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, DefaultDataSharingAttributes IterDA = Iter->DefaultAttr; switch (Iter->DefaultVCAttr) { case DSA_VC_aggregate: - if (!D->getType()->isAggregateType()) + if (!VD->getType()->isAggregateType()) IterDA = DSA_none; break; case DSA_VC_pointer: - if (!D->getType()->isPointerType()) + if (!VD->getType()->isPointerType()) IterDA = DSA_none; break; case DSA_VC_scalar: - if (!D->getType()->isScalarType()) + if (!VD->getType()->isScalarType()) IterDA = DSA_none; break; case DSA_VC_all: diff --git a/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp b/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp deleted file mode 100644 index ffafc9a9410b7..0000000000000 --- a/clang/test/OpenMP/parallel_default_variableCategory_codegen.cpp +++ /dev/null @@ -1,91 +0,0 @@ -// RUN: %clangxx -Xclang -verify -Wno-vla -fopenmp -fopenmp-version=60 -x c++ -S -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics -#ifndef HEADER -#define HEADER - -int global; -#define VECTOR_SIZE 4 - -int main (int argc, char **argv) { - int i,n; - int x; - - n = VECTOR_SIZE; - - #pragma omp parallel masked firstprivate(x) num_threads(2) - { - int *xPtr = nullptr; - // scalar - #pragma omp task default(shared:scalar) - { - xPtr = &x; - } - #pragma omp taskwait - - // pointer - #pragma omp task default(shared:pointer) shared(x) - { - xPtr = &x; - } - #pragma omp taskwait - } - - int *aggregate[VECTOR_SIZE] = {0,0,0,0}; - - #pragma omp parallel masked num_threads(2) - { - // aggregate - #pragma omp task default(shared:aggregate) - for(i=0;i Date: Fri, 14 Nov 2025 14:17:37 -0800 Subject: [PATCH 48/56] [mlir][bazel] Add apfloat test library (#168115) The apfloat code was added in #167848, and some bazel was added in #167916 but the runtime library for test-apfloat-emulation.mlir was missed. This patch adds the appropriate target. --- .../llvm-project-overlay/mlir/BUILD.bazel | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 89413d6ccbaac..effcd615786bf 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9794,6 +9794,29 @@ cc_binary( deps = [":mlir_float16_utils"], ) +cc_library( + name = "_mlir_apfloat_utils", + srcs = ["lib/ExecutionEngine/APFloatWrappers.cpp"], + defines = ["mlir_apfloat_wrappers_EXPORTS"], + includes = ["include"], + deps = [ + "//llvm:Support", + ], +) + +# Indirection to avoid 'libmlir_apfloat_utils.so' filename clash. +alias( + name = "mlir_apfloat_utils", + actual = "_mlir_apfloat_utils", +) + +cc_binary( + name = "libmlir_apfloat_utils.so", + linkshared = True, + linkstatic = False, + deps = [":mlir_apfloat_utils"], +) + # Unlike mlir_float16_utils, mlir_c_runner_utils, etc, we do *not* make # this a shared library: because on the CMake side, doing so causes issues # when building on Windows. In particular, various functions take/return From 825ebef51ad83492698cd8ac59c12375fc25636b Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 14 Nov 2025 14:30:28 -0800 Subject: [PATCH 49/56] [NFC][Support] Remove unused getLongestMatch from SpecialCaseList (#167193) This method is not used anywhere. Remove it. --- llvm/include/llvm/Support/SpecialCaseList.h | 10 +-- llvm/lib/Support/SpecialCaseList.cpp | 83 +++++++-------------- 2 files changed, 31 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index 5a012cf0c0264..28e264f078d89 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -110,8 +110,7 @@ class SpecialCaseList { // classes. LLVM_ABI bool createInternal(const std::vector &Paths, vfs::FileSystem &VFS, std::string &Error); - LLVM_ABI bool createInternal(const MemoryBuffer *MB, std::string &Error, - bool OrderBySize = false); + LLVM_ABI bool createInternal(const MemoryBuffer *MB, std::string &Error); SpecialCaseList() = default; SpecialCaseList(SpecialCaseList const &) = delete; @@ -137,11 +136,6 @@ class SpecialCaseList { LLVM_ABI unsigned getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const; - // Helper method to search by Prefix, Query, and Category. Returns - // matching rule, or empty string if there is no match. - LLVM_ABI StringRef getLongestMatch(StringRef Prefix, StringRef Query, - StringRef Category) const; - /// Returns true if the section has any entries for the given prefix. LLVM_ABI bool hasPrefix(StringRef Prefix) const; @@ -166,7 +160,7 @@ class SpecialCaseList { /// Parses just-constructed SpecialCaseList entries from a memory buffer. LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB, - std::string &Error, bool OrderBySize); + std::string &Error); }; } // namespace llvm diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 91f98cf7fac6c..b3bc6ea3bcb30 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -38,16 +38,13 @@ namespace llvm { namespace { -using Match = std::pair; -static constexpr Match NotMatched = {"", 0}; - // Lagacy v1 matcher. class RegexMatcher { public: Error insert(StringRef Pattern, unsigned LineNumber); - void preprocess(bool BySize); + void preprocess(); - Match match(StringRef Query) const; + unsigned match(StringRef Query) const; struct Reg { Reg(StringRef Name, unsigned LineNo, Regex &&Rg) @@ -63,9 +60,9 @@ class RegexMatcher { class GlobMatcher { public: Error insert(StringRef Pattern, unsigned LineNumber); - void preprocess(bool BySize); + void preprocess(); - Match match(StringRef Query) const; + unsigned match(StringRef Query) const; struct Glob { Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern) @@ -92,10 +89,10 @@ class Matcher { Matcher(bool UseGlobs, bool RemoveDotSlash); Error insert(StringRef Pattern, unsigned LineNumber); - void preprocess(bool BySize); - Match match(StringRef Query) const; + void preprocess(); + unsigned match(StringRef Query) const; - bool matchAny(StringRef Query) const { return match(Query).second > 0; } + bool matchAny(StringRef Query) const { return match(Query); } std::variant M; bool RemoveDotSlash; @@ -125,19 +122,13 @@ Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) { return Error::success(); } -void RegexMatcher::preprocess(bool BySize) { - if (BySize) { - llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) { - return A.Name.size() < B.Name.size(); - }); - } -} +void RegexMatcher::preprocess() {} -Match RegexMatcher::match(StringRef Query) const { +unsigned RegexMatcher::match(StringRef Query) const { for (const auto &R : reverse(RegExes)) if (R.Rg.match(Query)) - return {R.Name, R.LineNo}; - return NotMatched; + return R.LineNo; + return 0; } Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) { @@ -151,13 +142,7 @@ Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) { return Error::success(); } -void GlobMatcher::preprocess(bool BySize) { - if (BySize) { - llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) { - return A.Name.size() < B.Name.size(); - }); - } - +void GlobMatcher::preprocess() { for (const auto &[Idx, G] : enumerate(Globs)) { StringRef Prefix = G.Pattern.prefix(); StringRef Suffix = G.Pattern.suffix(); @@ -181,7 +166,7 @@ void GlobMatcher::preprocess(bool BySize) { } } -Match GlobMatcher::match(StringRef Query) const { +unsigned GlobMatcher::match(StringRef Query) const { int Best = -1; if (!PrefixSuffixToGlob.empty()) { for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) { @@ -224,9 +209,7 @@ Match GlobMatcher::match(StringRef Query) const { } } } - if (Best < 0) - return NotMatched; - return {Globs[Best].Name, Globs[Best].LineNo}; + return Best < 0 ? 0 : Globs[Best].LineNo; } Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) @@ -241,20 +224,20 @@ Error Matcher::insert(StringRef Pattern, unsigned LineNumber) { return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); } -void Matcher::preprocess(bool BySize) { - return std::visit([&](auto &V) { return V.preprocess(BySize); }, M); +void Matcher::preprocess() { + return std::visit([&](auto &V) { return V.preprocess(); }, M); } -Match Matcher::match(StringRef Query) const { +unsigned Matcher::match(StringRef Query) const { if (RemoveDotSlash) Query = llvm::sys::path::remove_leading_dotslash(Query); - return std::visit([&](auto &V) -> Match { return V.match(Query); }, M); + return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M); } } // namespace class SpecialCaseList::Section::SectionImpl { public: - void preprocess(bool OrderBySize); + void preprocess(); const Matcher *findMatcher(StringRef Prefix, StringRef Category) const; using SectionEntries = StringMap>; @@ -304,7 +287,7 @@ bool SpecialCaseList::createInternal(const std::vector &Paths, return false; } std::string ParseError; - if (!parse(i, FileOrErr.get().get(), ParseError, /*OrderBySize=*/false)) { + if (!parse(i, FileOrErr.get().get(), ParseError)) { Error = (Twine("error parsing file '") + Path + "': " + ParseError).str(); return false; } @@ -312,9 +295,9 @@ bool SpecialCaseList::createInternal(const std::vector &Paths, return true; } -bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error, - bool OrderBySize) { - if (!parse(0, MB, Error, OrderBySize)) +bool SpecialCaseList::createInternal(const MemoryBuffer *MB, + std::string &Error) { + if (!parse(0, MB, Error)) return false; return true; } @@ -337,7 +320,7 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, } bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, - std::string &Error, bool OrderBySize) { + std::string &Error) { unsigned long long Version = 2; StringRef Header = MB->getBuffer(); @@ -413,7 +396,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } for (Section &S : Sections) - S.Impl->preprocess(OrderBySize); + S.Impl->preprocess(); return true; } @@ -465,29 +448,21 @@ SpecialCaseList::Section::SectionImpl::findMatcher(StringRef Prefix, return &II->second; } -void SpecialCaseList::Section::SectionImpl::preprocess(bool OrderBySize) { - SectionMatcher.preprocess(false); +void SpecialCaseList::Section::SectionImpl::preprocess() { + SectionMatcher.preprocess(); for (auto &[K1, E] : Entries) for (auto &[K2, M] : E) - M.preprocess(OrderBySize); + M.preprocess(); } unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const { if (const Matcher *M = Impl->findMatcher(Prefix, Category)) - return M->match(Query).second; + return M->match(Query); return 0; } -StringRef SpecialCaseList::Section::getLongestMatch(StringRef Prefix, - StringRef Query, - StringRef Category) const { - if (const Matcher *M = Impl->findMatcher(Prefix, Category)) - return M->match(Query).first; - return {}; -} - bool SpecialCaseList::Section::hasPrefix(StringRef Prefix) const { return Impl->Entries.find(Prefix) != Impl->Entries.end(); } From 6dad2c2cfb9255bb8b4fec3565f99ffda32dfb1a Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 14 Nov 2025 14:43:01 -0800 Subject: [PATCH 50/56] [lldb] Add a test for capturing stdout/stderr from Python commands (#168138) --- .../TestCommandScriptOutput.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 lldb/test/API/python_api/command_script_output/TestCommandScriptOutput.py diff --git a/lldb/test/API/python_api/command_script_output/TestCommandScriptOutput.py b/lldb/test/API/python_api/command_script_output/TestCommandScriptOutput.py new file mode 100644 index 0000000000000..abe0eec1cb42c --- /dev/null +++ b/lldb/test/API/python_api/command_script_output/TestCommandScriptOutput.py @@ -0,0 +1,47 @@ +""" +Test that HandleCommand captures stdout and stderr from script commands. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + + +class CommandScriptOutputTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_script_command_stdout_stderr(self): + """Test that HandleCommand captures stdout and stderr from script commands.""" + ci = self.dbg.GetCommandInterpreter() + self.assertTrue(ci, VALID_COMMAND_INTERPRETER) + + res = lldb.SBCommandReturnObject() + + # Execute a script command that writes to stdout. + ci.HandleCommand("script print('Hello stdout')", res) + self.assertTrue(res.Succeeded()) + self.assertIn("Hello stdout", res.GetOutput()) + + # Execute a script command that writes to stderr. + ci.HandleCommand("script import sys; sys.stderr.write('Hello stderr\\n')", res) + self.assertTrue(res.Succeeded()) + self.assertIn("Hello stderr", res.GetOutput()) + + # Execute a script command that writes to both stdout and stderr. + ci.HandleCommand( + "script import sys; print('Output line'); sys.stderr.write('Error line\\n')", + res, + ) + self.assertTrue(res.Succeeded()) + self.assertIn("Output line", res.GetOutput()) + self.assertIn("Error line", res.GetOutput()) + + # Test that multiple print statements are captured. + ci.HandleCommand( + "script print('Line 1'); print('Line 2'); print('Line 3')", res + ) + self.assertTrue(res.Succeeded()) + output = res.GetOutput() + self.assertIn("Line 1", output) + self.assertIn("Line 2", output) + self.assertIn("Line 3", output) From 72a6ae6844752f2232c64b41e4eccf979289da72 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 14 Nov 2025 17:50:17 -0500 Subject: [PATCH 51/56] [AMDGPU] Fix wrong MSB encoding for V_FMAMK instructions (#168107) These instructions use `src0`, `imm`, `src1` as operand. Fixes SWDEV-566579. --- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 27 +++++++++++-- .../AMDGPU/vgpr-lowering-gfx1250-t16.mir | 32 +++++++++++++++ .../CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir | 39 ++++++++++++++++--- 3 files changed, 89 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 37bf2d2463ae2..aff4cfe1dc70e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3439,17 +3439,36 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) { AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y, AMDGPU::OpName::vdstY}; + // VOP2 MADMK instructions use src0, imm, src1 scheme. + static const AMDGPU::OpName VOP2MADMKOps[4] = { + AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES, + AMDGPU::OpName::src1, AMDGPU::OpName::vdst}; + unsigned TSFlags = Desc.TSFlags; if (TSFlags & (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) { + switch (Desc.getOpcode()) { // LD_SCALE operands ignore MSB. - if (Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32 || - Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250 || - Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64 || - Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250) + case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32: + case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250: + case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64: + case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250: return {}; + case AMDGPU::V_FMAMK_F16: + case AMDGPU::V_FMAMK_F16_t16: + case AMDGPU::V_FMAMK_F16_t16_gfx12: + case AMDGPU::V_FMAMK_F16_fake16: + case AMDGPU::V_FMAMK_F16_fake16_gfx12: + case AMDGPU::V_FMAMK_F32: + case AMDGPU::V_FMAMK_F32_gfx12: + case AMDGPU::V_FMAMK_F64: + case AMDGPU::V_FMAMK_F64_gfx1250: + return {VOP2MADMKOps, nullptr}; + default: + break; + } return {VOPOps, nullptr}; } diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir index 32cc398740d62..d524aad01a902 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir @@ -64,3 +64,35 @@ body: | ; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/ $vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode ... + +# ASM-LABEL: {{^}}fmaak_fmamk: +# DIS-LABEL: : +--- +name: fmaak_fmamk +tracksRegLiveness: true +body: | + bb.0: + ; ASM: %bb.0: + + ; We use an extra instruction to set the MSB, and then we expect it to be reset to 0 (lower 16-bit). + + ; GCN: s_set_vgpr_msb 0xcf + ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/ + $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0xcf00 + ; GCN-NEXT: v_fmamk_f16 v26.l, v56.l, 0x1, v58.l + $vgpr26_lo16 = V_FMAMK_F16_t16 undef $vgpr56_lo16, 1, undef $vgpr58_lo16, implicit $exec, implicit $mode + + ; GCN-NEXT: v_fmamk_f16 v0.l, v35.l, 0x1, v2.l + $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr35_lo16, 1, undef $vgpr2_lo16, implicit $exec, implicit $mode + + ; GCN-NEXT: v_fmamk_f16 v0.l, v2.l, 0x1, v6.l + $vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr6_lo16, implicit $exec, implicit $mode + + ; GCN-NEXT: v_fmamk_f16 v5.l, v2.l, 0x1, v4.l + $vgpr5_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode + + ; ASM: NumVgprs: 771 + +... diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir index 7e1c28f8e7bbb..e8c27f2eb3685 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir @@ -332,23 +332,52 @@ body: | ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1 $vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x4445 + ; GCN-NEXT: s_set_vgpr_msb 0x4451 ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/ $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x4505 + ; GCN-NEXT: s_set_vgpr_msb 0x5111 ; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/ $vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x541 + ; GCN-NEXT: s_set_vgpr_msb 0x1141 ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x4144 + ; GCN-NEXT: s_set_vgpr_msb 0x4150 ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/ $vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode - ; ASM: NumVgprs: 259 + ; GCN-NEXT: s_set_vgpr_msb 0x5051 + ; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/ + $vgpr260_vgpr261 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr258_vgpr259, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0x5101 + ; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3] + $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr2_vgpr3, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0x110 + ; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/ + $vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr356_vgpr357, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0x1040 + ; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5] + $vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0x4000 + ; GCN-NEXT: v_fmamk_f16 v26, v56, 0x1, v58 + $vgpr26 = V_FMAMK_F16_fake16 undef $vgpr56, 1, undef $vgpr58, implicit $exec, implicit $mode + + ; GCN-NEXT: v_fmamk_f16 v0, v35, 0x1, v2 + $vgpr0 = V_FMAMK_F16_fake16 undef $vgpr35, 1, undef $vgpr2, implicit $exec, implicit $mode + + ; GCN-NEXT: v_fmamk_f16 v0, v2, 0x1, v6 + $vgpr0 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr6, implicit $exec, implicit $mode + + ; GCN-NEXT: v_fmamk_f16 v5, v2, 0x1, v4 + $vgpr5 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr4, implicit $exec, implicit $mode + + ; ASM: NumVgprs: 358 ... From ddf5bb0a2e2d2dd77bce66173387d62ab7174d9f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 14 Nov 2025 14:14:54 -0800 Subject: [PATCH 52/56] [SLP]Check if the copyable element is a sub instruciton with abs in isCommutable Need to check if the non-copyable element is an instruction before actually trying to check its NSW attribute. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 11 ++++--- .../non-inst-abs-sub-copyable-value.ll | 29 +++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e61eb0fcfe492..938eacde7548d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -537,7 +537,8 @@ static bool isSplat(ArrayRef VL) { /// \param I The instruction to check for commutativity /// \param ValWithUses The value whose uses are analyzed for special /// patterns -static bool isCommutative(Instruction *I, Value *ValWithUses) { +static bool isCommutative(Instruction *I, Value *ValWithUses, + bool IsCopyable = false) { if (auto *Cmp = dyn_cast(I)) return Cmp->isCommutative(); if (auto *BO = dyn_cast(I)) @@ -546,7 +547,7 @@ static bool isCommutative(Instruction *I, Value *ValWithUses) { !ValWithUses->hasNUsesOrMore(UsesLimit) && all_of( ValWithUses->uses(), - [](const Use &U) { + [&](const Use &U) { // Commutative, if icmp eq/ne sub, 0 CmpPredicate Pred; if (match(U.getUser(), @@ -555,10 +556,11 @@ static bool isCommutative(Instruction *I, Value *ValWithUses) { return true; // Commutative, if abs(sub nsw, true) or abs(sub, false). ConstantInt *Flag; + auto *I = dyn_cast(U.get()); return match(U.getUser(), m_Intrinsic( m_Specific(U.get()), m_ConstantInt(Flag))) && - (!cast(U.get())->hasNoSignedWrap() || + ((!IsCopyable && I && !I->hasNoSignedWrap()) || Flag->isOne()); })) || (BO->getOpcode() == Instruction::FSub && @@ -3164,7 +3166,8 @@ class BoUpSLP { bool IsInverseOperation = false; if (S.isCopyableElement(VL[Lane])) { // The value is a copyable element. - IsInverseOperation = !isCommutative(MainOp, VL[Lane]); + IsInverseOperation = + !isCommutative(MainOp, VL[Lane], /*IsCopyable=*/true); } else { assert(I && "Expected instruction"); auto [SelectedOp, Ops] = convertTo(I, S); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll new file mode 100644 index 0000000000000..d90873f1895e4 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=arm64-apple-macosx15.0.0 < %s | FileCheck %s + +define i1 @test(i32 %shr.i.i90, i32 %x) { +; CHECK-LABEL: define i1 @test( +; CHECK-SAME: i32 [[SHR_I_I90:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SUB32_I_I:%.*]] = sub i32 [[X]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SHR_I_I90]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB32_I_I]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: ret i1 [[TMP5]] +; +entry: + %cond.i.i = tail call i32 @llvm.abs.i32(i32 %shr.i.i90, i1 true) + %conv.i.i91 = zext i32 %cond.i.i to i64 + %sub32.i.i = sub i32 %x, 2 + %cond41.i.i = tail call i32 @llvm.abs.i32(i32 %sub32.i.i, i1 true) + %conv42.i.i = zext i32 %cond41.i.i to i64 + %cmp.not.i.2.i.i = icmp ugt i64 %conv.i.i91, 300 + %cmp.not.i.3.i.i = icmp ugt i64 %conv42.i.i, 100 + ret i1 %cmp.not.i.3.i.i +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.abs.i32(i32, i1 immarg) #0 From 459a64b9045481cce345e2ec7812bfc8c50e4a44 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 14 Nov 2025 15:17:01 -0800 Subject: [PATCH 53/56] [lldb] Diagnose unsupported configurations when targeting the Limited C API (#168145) Diagnose unsupported configurations when targeting the Python Limited C API. I used SEND_ERROR so that if there's multiple issues, you don't need to keep reconfiguring. --- lldb/cmake/modules/LLDBConfig.cmake | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 4b568d27c4709..89da2341839bc 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -180,13 +180,25 @@ if (LLDB_ENABLE_PYTHON) "Path to use as PYTHONHOME in lldb. If a relative path is specified, it will be resolved at runtime relative to liblldb directory.") endif() - if (SWIG_VERSION VERSION_GREATER_EQUAL "4.2" AND NOT LLDB_EMBED_PYTHON_HOME) + # Enable targeting the Python Limited C API. + set(PYTHON_LIMITED_API_MIN_SWIG_VERSION "4.2") + if (SWIG_VERSION VERSION_GREATER_EQUAL PYTHON_LIMITED_API_MIN_SWIG_VERSION + AND NOT LLDB_EMBED_PYTHON_HOME) set(default_enable_python_limited_api ON) else() set(default_enable_python_limited_api OFF) endif() option(LLDB_ENABLE_PYTHON_LIMITED_API "Force LLDB to only use the Python Limited API (requires SWIG 4.2 or later)" ${default_enable_python_limited_api}) + + # Diagnose unsupported configurations. + if (LLDB_ENABLE_PYTHON_LIMITED_API AND LLDB_EMBED_PYTHON_HOME) + message(SEND_ERROR "LLDB_ENABLE_PYTHON_LIMITED_API is not compatible with LLDB_EMBED_PYTHON_HOME") + endif() + if (LLDB_ENABLE_PYTHON_LIMITED_API AND SWIG_VERSION VERSION_LESS PYTHON_LIMITED_API_MIN_SWIG_VERSION) + message(SEND_ERROR "LLDB_ENABLE_PYTHON_LIMITED_API is not compatible with SWIG ${SWIG_VERSION} (requires SWIG ${PYTHON_LIMITED_API_MIN_SWIG_VERSION})") + endif() + else() # Even if Python scripting is disabled, we still need a Python interpreter to # build, for example to generate SBLanguages.h. From e8cc0d22073fd5f59e0b9e1f940dc7a9d4e82218 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 14 Nov 2025 15:21:49 -0800 Subject: [PATCH 54/56] Revert "[SLP]Check if the copyable element is a sub instruciton with abs in isCommutable" This reverts commit ddf5bb0a2e2d2dd77bce66173387d62ab7174d9f to fix buildbots https://lab.llvm.org/buildbot/#/builders/11/builds/28083. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 11 +++---- .../non-inst-abs-sub-copyable-value.ll | 29 ------------------- 2 files changed, 4 insertions(+), 36 deletions(-) delete mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 938eacde7548d..e61eb0fcfe492 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -537,8 +537,7 @@ static bool isSplat(ArrayRef VL) { /// \param I The instruction to check for commutativity /// \param ValWithUses The value whose uses are analyzed for special /// patterns -static bool isCommutative(Instruction *I, Value *ValWithUses, - bool IsCopyable = false) { +static bool isCommutative(Instruction *I, Value *ValWithUses) { if (auto *Cmp = dyn_cast(I)) return Cmp->isCommutative(); if (auto *BO = dyn_cast(I)) @@ -547,7 +546,7 @@ static bool isCommutative(Instruction *I, Value *ValWithUses, !ValWithUses->hasNUsesOrMore(UsesLimit) && all_of( ValWithUses->uses(), - [&](const Use &U) { + [](const Use &U) { // Commutative, if icmp eq/ne sub, 0 CmpPredicate Pred; if (match(U.getUser(), @@ -556,11 +555,10 @@ static bool isCommutative(Instruction *I, Value *ValWithUses, return true; // Commutative, if abs(sub nsw, true) or abs(sub, false). ConstantInt *Flag; - auto *I = dyn_cast(U.get()); return match(U.getUser(), m_Intrinsic( m_Specific(U.get()), m_ConstantInt(Flag))) && - ((!IsCopyable && I && !I->hasNoSignedWrap()) || + (!cast(U.get())->hasNoSignedWrap() || Flag->isOne()); })) || (BO->getOpcode() == Instruction::FSub && @@ -3166,8 +3164,7 @@ class BoUpSLP { bool IsInverseOperation = false; if (S.isCopyableElement(VL[Lane])) { // The value is a copyable element. - IsInverseOperation = - !isCommutative(MainOp, VL[Lane], /*IsCopyable=*/true); + IsInverseOperation = !isCommutative(MainOp, VL[Lane]); } else { assert(I && "Expected instruction"); auto [SelectedOp, Ops] = convertTo(I, S); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll deleted file mode 100644 index d90873f1895e4..0000000000000 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/non-inst-abs-sub-copyable-value.ll +++ /dev/null @@ -1,29 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 -; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=arm64-apple-macosx15.0.0 < %s | FileCheck %s - -define i1 @test(i32 %shr.i.i90, i32 %x) { -; CHECK-LABEL: define i1 @test( -; CHECK-SAME: i32 [[SHR_I_I90:%.*]], i32 [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SUB32_I_I:%.*]] = sub i32 [[X]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SHR_I_I90]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB32_I_I]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: ret i1 [[TMP5]] -; -entry: - %cond.i.i = tail call i32 @llvm.abs.i32(i32 %shr.i.i90, i1 true) - %conv.i.i91 = zext i32 %cond.i.i to i64 - %sub32.i.i = sub i32 %x, 2 - %cond41.i.i = tail call i32 @llvm.abs.i32(i32 %sub32.i.i, i1 true) - %conv42.i.i = zext i32 %cond41.i.i to i64 - %cmp.not.i.2.i.i = icmp ugt i64 %conv.i.i91, 300 - %cmp.not.i.3.i.i = icmp ugt i64 %conv42.i.i, 100 - ret i1 %cmp.not.i.3.i.i -} - -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.abs.i32(i32, i1 immarg) #0 From 4c4ffd30cd94469a940397815e08c1b058fdb8f3 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 14 Nov 2025 23:25:40 +0000 Subject: [PATCH 55/56] [ProfCheck] Refactor Select Instrumentation to use Early Exits (#168086) I think this is quite a bit more readable than the nested conditionals. From review feedback that was not addressed precommitn in #167973. --- llvm/lib/Transforms/Utils/ProfileVerify.cpp | 26 +++++++++++++-------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp index 149c0879edcdd..c7cf8256d393c 100644 --- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp +++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp @@ -102,11 +102,14 @@ bool ProfileInjector::inject() { for (auto &BB : F) { if (AnnotateSelect) { for (auto &I : BB) { - if (auto *SI = dyn_cast(&I)) - if (!SI->getCondition()->getType()->isVectorTy() && - !I.getMetadata(LLVMContext::MD_prof)) - setBranchWeights(I, {SelectTrueWeight, SelectFalseWeight}, - /*IsExpected=*/false); + if (auto *SI = dyn_cast(&I)) { + if (SI->getCondition()->getType()->isVectorTy()) + continue; + if (I.getMetadata(LLVMContext::MD_prof)) + continue; + setBranchWeights(I, {SelectTrueWeight, SelectFalseWeight}, + /*IsExpected=*/false); + } } } auto *Term = getTerminatorBenefitingFromMDProf(BB); @@ -187,11 +190,14 @@ PreservedAnalyses ProfileVerifierPass::run(Function &F, for (const auto &BB : F) { if (AnnotateSelect) { for (const auto &I : BB) - if (auto *SI = dyn_cast(&I)) - if (!SI->getCondition()->getType()->isVectorTy() && - !I.getMetadata(LLVMContext::MD_prof)) - F.getContext().emitError( - "Profile verification failed: select annotation missing"); + if (auto *SI = dyn_cast(&I)) { + if (SI->getCondition()->getType()->isVectorTy()) + continue; + if (I.getMetadata(LLVMContext::MD_prof)) + continue; + F.getContext().emitError( + "Profile verification failed: select annotation missing"); + } } if (const auto *Term = ProfileInjector::getTerminatorBenefitingFromMDProf(BB)) From e02fdf0fcea3c02dc0eb692bc38042f9681e4afd Mon Sep 17 00:00:00 2001 From: Thibault Monnier <97551402+Thibault-Monnier@users.noreply.github.com> Date: Sat, 15 Nov 2025 00:35:30 +0100 Subject: [PATCH 56/56] [CIR] Upstream CIR codegen for vec_ext x86 builtins (#167942) This PR upstreams the codegen for the x86 vec_ext builtins from the incubator. It is part of #167752. --- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 6 ++++-- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 16 ++++++++++++++ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 25 ++++++++++++++++++---- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 22 ------------------- clang/lib/CIR/CodeGen/CIRGenFunction.h | 6 +++--- clang/test/CIR/CodeGen/X86/sse2-builtins.c | 15 +++++++++++++ 6 files changed, 59 insertions(+), 31 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index e5066fac19185..a0af38ceab74c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -315,8 +315,10 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return getConstantInt(loc, getUInt32Ty(), c); } cir::ConstantOp getSInt64(uint64_t c, mlir::Location loc) { - cir::IntType sInt64Ty = getSInt64Ty(); - return cir::ConstantOp::create(*this, loc, cir::IntAttr::get(sInt64Ty, c)); + return getConstantInt(loc, getSInt64Ty(), c); + } + cir::ConstantOp getUInt64(uint64_t c, mlir::Location loc) { + return getConstantInt(loc, getUInt64Ty(), c); } mlir::Value createNeg(mlir::Value value) { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index eec4d10bb49b8..77f19343653db 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -630,6 +630,22 @@ CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e, getTarget().getTriple().getArch()); } +mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg( + const unsigned iceArguments, const unsigned idx, const Expr *argExpr) { + mlir::Value arg = {}; + if ((iceArguments & (1 << idx)) == 0) { + arg = emitScalarExpr(argExpr); + } else { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + const std::optional result = + argExpr->getIntegerConstantExpr(getContext()); + assert(result && "Expected argument to be a constant"); + arg = builder.getConstInt(getLoc(argExpr->getSourceRange()), *result); + } + return arg; +} + /// Given a builtin id for a function like "__builtin_fabsf", return a Function* /// for "fabsf". cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *fd, diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 2d6cf30fa2ded..ba160373ec77e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -16,7 +16,6 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/MissingFeatures.h" -#include "llvm/IR/IntrinsicsX86.h" using namespace clang; using namespace clang::CIRGen; @@ -66,9 +65,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, getContext().GetBuiltinType(builtinID, error, &iceArguments); assert(error == ASTContext::GE_None && "Error while getting builtin type."); - for (auto [idx, arg] : llvm::enumerate(e->arguments())) { + for (auto [idx, arg] : llvm::enumerate(e->arguments())) ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg)); - } CIRGenBuilderTy &builder = getBuilder(); mlir::Type voidTy = builder.getVoidTy(); @@ -98,6 +96,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: + cgm.errorNYI(e->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: @@ -107,7 +109,22 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vec_ext_v32qi: case X86::BI__builtin_ia32_vec_ext_v16hi: case X86::BI__builtin_ia32_vec_ext_v8si: - case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned numElts = cast(ops[0].getType()).getSize(); + + uint64_t index = + ops[1].getDefiningOp().getIntValue().getZExtValue(); + + index &= numElts - 1; + + cir::ConstantOp indexVal = + builder.getUInt64(index, getLoc(e->getExprLoc())); + + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return cir::VecExtractOp::create(builder, getLoc(e->getExprLoc()), ops[0], + indexVal); + } case X86::BI__builtin_ia32_vec_set_v4hi: case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v8hi: diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 15d96c0a340ff..5d9188777741d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -1442,28 +1442,6 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e, return ScalarExprEmitter(*this, builder).Visit(const_cast(e)); } -mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned iceArguments, - unsigned index, - const Expr *arg) { - mlir::Value result{}; - - // The bit at the specified index indicates whether the argument is required - // to be a constant integer expression. - bool isArgRequiredToBeConstant = (iceArguments & (1 << index)); - - if (!isArgRequiredToBeConstant) { - result = emitScalarExpr(arg); - } else { - // If this is required to be a constant, constant fold it so that we - // know that the generated intrinsic gets a ConstantInt. - std::optional iceOpt = - arg->getIntegerConstantExpr(getContext()); - assert(iceOpt && "Expected argument to be a constant"); - result = builder.getConstInt(getLoc(arg->getSourceRange()), *iceOpt); - } - return result; -} - [[maybe_unused]] static bool mustVisitNullValue(const Expr *e) { // If a null pointer expression's type is the C++0x nullptr_t and // the expression is not a simple literal, it must be evaluated diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index b81706668b431..00f289bcd1bb2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1546,9 +1546,6 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::Value emitScalarExpr(const clang::Expr *e, bool ignoreResultAssign = false); - mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned index, - const Expr *arg); - mlir::Value emitScalarPrePostIncDec(const UnaryOperator *e, LValue lv, cir::UnaryOpKind kind, bool isPre); @@ -1721,6 +1718,9 @@ class CIRGenFunction : public CIRGenTypeCache { void emitScalarInit(const clang::Expr *init, mlir::Location loc, LValue lvalue, bool capturedByInit = false); + mlir::Value emitScalarOrConstFoldImmArg(unsigned iceArguments, unsigned idx, + const Expr *argExpr); + void emitStaticVarDecl(const VarDecl &d, cir::GlobalLinkageKind linkage); void emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest, diff --git a/clang/test/CIR/CodeGen/X86/sse2-builtins.c b/clang/test/CIR/CodeGen/X86/sse2-builtins.c index 144ca143fbf15..9ec8f7a9bf4fe 100644 --- a/clang/test/CIR/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CIR/CodeGen/X86/sse2-builtins.c @@ -16,6 +16,21 @@ #include +// Lowering to pextrw requires optimization. +int test_mm_extract_epi16(__m128i A) { + // CIR-LABEL: test_mm_extract_epi16 + // CIR %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector + // CIR %{{.*}} = cir.cast integral %{{.*}} : !u16i -> !s32i + + // LLVM-LABEL: test_mm_extract_epi16 + // LLVM: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1 + // LLVM: zext i16 %{{.*}} to i32 + + // OGCG-LABEL: test_mm_extract_epi16 + // OGCG: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1 + // OGCG: zext i16 %{{.*}} to i32 + return _mm_extract_epi16(A, 1); +} void test_mm_clflush(void* A) { // CIR-LABEL: test_mm_clflush