diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py index a6d6edbd547e7..ac39a47d43c07 100644 --- a/.ci/metrics/metrics.py +++ b/.ci/metrics/metrics.py @@ -370,6 +370,13 @@ def github_get_metrics( started_at = job.started_at completed_at = job.completed_at + if completed_at is None: + logging.info( + f"Workflow {task.id} is marked completed but has a job without a " + "completion timestamp." + ) + continue + # GitHub API can return results where the started_at is slightly # later then the created_at (or completed earlier than started). # This would cause a -23h59mn delta, which will show up as +24h diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index d923d2a90e908..ad448766e665f 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -615,7 +615,7 @@ def __nvvm_f2bf16_rz : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>; def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>; def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>; -def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_89, PTX81>; +def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX81>; def __nvvm_f2tf32_rn : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_90, PTX78>; def __nvvm_f2tf32_rn_relu : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_90, PTX78>; def __nvvm_f2tf32_rn_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_100, PTX86>; diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index e3be262622844..c0ed799970122 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -28,6 +28,9 @@ // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_89 -target-feature +ptx81 -DPTX=81\ // RUN: -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \ // RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM89 %s +// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_80 -target-feature +ptx81 -DPTX=81 \ +// RUN: -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM80 %s // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_90 -target-feature +ptx78 -DPTX=78 \ // RUN: -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \ // RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX78_SM90 %s @@ -1025,6 +1028,10 @@ __device__ void nvvm_cvt_sm80() { // CHECK_PTX70_SM80: call i32 @llvm.nvvm.f2tf32.rna(float 1.000000e+00) __nvvm_f2tf32_rna(1); + #if PTX >= 81 + // CHECK_PTX81_SM80: call i32 @llvm.nvvm.f2tf32.rna.satfinite(float 1.000000e+00) + __nvvm_f2tf32_rna_satfinite(1.0f); + #endif #endif // CHECK: ret void } @@ -1058,9 +1065,6 @@ __device__ void nvvm_cvt_sm89() { __nvvm_e5m2x2_to_f16x2_rn(0x4c4c); // CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 19532) __nvvm_e5m2x2_to_f16x2_rn_relu(0x4c4c); - - // CHECK_PTX81_SM89: call i32 @llvm.nvvm.f2tf32.rna.satfinite(float 1.000000e+00) - __nvvm_f2tf32_rna_satfinite(1.0f); #endif // CHECK: ret void } diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 0f0f87915bafe..3f7dd8e402b78 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -113,9 +113,6 @@ def push_dynamic_library_lookup_path(config, new_path): config.environment[dynamic_library_lookup_var] = new_ld_library_path_64 -# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. -# See https://github.com/llvm/llvm-project/issues/106636 for more details. -# # Choose between lit's internal shell pipeline runner and a real shell. If # LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override. use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") @@ -123,8 +120,9 @@ def push_dynamic_library_lookup_path(config, new_path): # 0 is external, "" is default, and everything else is internal. execute_external = use_lit_shell == "0" else: - # Otherwise we default to internal everywhere. - execute_external = False + # Otherwise we default to internal on Windows and external elsewhere, as + # bash on Windows is usually very slow. + execute_external = not sys.platform in ["win32"] # Allow expanding substitutions that are based on other substitutions config.recursiveExpansionLimit = 10 diff --git a/compiler-rt/test/orc/TestCases/Darwin/arm64/objc-imageinfo.S b/compiler-rt/test/orc/TestCases/Darwin/arm64/objc-imageinfo.S index 78454e33f7356..d5524020f2636 100644 --- a/compiler-rt/test/orc/TestCases/Darwin/arm64/objc-imageinfo.S +++ b/compiler-rt/test/orc/TestCases/Darwin/arm64/objc-imageinfo.S @@ -5,7 +5,9 @@ // RUN: rm -rf %t // RUN: split-file %s %t -// RUN: (cd %t; %clang -c *.S) +// RUN: pushd %t +// RUN: %clang -c *.S +// RUN: popd // Check individual versions are loadable. diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 42571862b24b2..acfd4c8a14acb 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -945,6 +945,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_addr libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index b62a46b7178d5..83d5c42816632 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -1078,6 +1078,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_addr libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 910bdc53cbbc5..bbff4969cb413 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1119,6 +1119,7 @@ if(LLVM_LIBC_FULL_BUILD) # arpa/inet.h entrypoints libc.src.arpa.inet.htonl libc.src.arpa.inet.htons + libc.src.arpa.inet.inet_addr libc.src.arpa.inet.inet_aton libc.src.arpa.inet.ntohl libc.src.arpa.inet.ntohs diff --git a/libc/include/arpa/inet.yaml b/libc/include/arpa/inet.yaml index 6e0629072b6ef..350a4d74e5bec 100644 --- a/libc/include/arpa/inet.yaml +++ b/libc/include/arpa/inet.yaml @@ -3,6 +3,7 @@ header_template: inet.h.def macros: [] types: - type_name: in_addr + - type_name: in_addr_t enums: [] objects: [] functions: @@ -18,6 +19,12 @@ functions: return_type: uint16_t arguments: - type: uint16_t + - name: inet_addr + standards: + - POSIX + return_type: in_addr_t + arguments: + - type: const char * - name: inet_aton standards: - llvm_libc_ext diff --git a/libc/include/llvm-libc-macros/netinet-in-macros.h b/libc/include/llvm-libc-macros/netinet-in-macros.h index 2011c34e288cd..7a4d26d832114 100644 --- a/libc/include/llvm-libc-macros/netinet-in-macros.h +++ b/libc/include/llvm-libc-macros/netinet-in-macros.h @@ -29,6 +29,7 @@ #define INADDR_ANY __LLVM_LIBC_CAST(static_cast, in_addr_t, 0x00000000) #define INADDR_BROADCAST __LLVM_LIBC_CAST(static_cast, in_addr_t, 0xffffffff) +#define INADDR_NONE __LLVM_LIBC_CAST(static_cast, in_addr_t, 0xffffffff) #define INET_ADDRSTRLEN 16 #define INET6_ADDRSTRLEN 46 diff --git a/libc/src/arpa/inet/CMakeLists.txt b/libc/src/arpa/inet/CMakeLists.txt index bb43e24ec9d0b..3b3d0f43b8586 100644 --- a/libc/src/arpa/inet/CMakeLists.txt +++ b/libc/src/arpa/inet/CMakeLists.txt @@ -35,6 +35,21 @@ add_entrypoint_object( libc.src.__support.str_to_integer ) +add_entrypoint_object( + inet_addr + SRCS + inet_addr.cpp + HDRS + inet_addr.h + DEPENDS + libc.include.arpa_inet + libc.include.llvm-libc-macros.netinet_in_macros + libc.include.llvm-libc-types.in_addr + libc.include.llvm-libc-types.in_addr_t + libc.src.__support.common + libc.src.arpa.inet.inet_aton +) + add_entrypoint_object( ntohl SRCS diff --git a/libc/src/arpa/inet/inet_addr.cpp b/libc/src/arpa/inet/inet_addr.cpp new file mode 100644 index 0000000000000..8ce88c0df8aec --- /dev/null +++ b/libc/src/arpa/inet/inet_addr.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of inet_addr function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/arpa/inet/inet_addr.h" +#include "include/llvm-libc-macros/netinet-in-macros.h" +#include "include/llvm-libc-types/in_addr.h" +#include "include/llvm-libc-types/in_addr_t.h" +#include "src/__support/common.h" +#include "src/arpa/inet/inet_aton.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(in_addr_t, inet_addr, (const char *cp)) { + in_addr addr; + return inet_aton(cp, &addr) ? addr.s_addr : INADDR_NONE; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/arpa/inet/inet_addr.h b/libc/src/arpa/inet/inet_addr.h new file mode 100644 index 0000000000000..66f1ae80dd5a0 --- /dev/null +++ b/libc/src/arpa/inet/inet_addr.h @@ -0,0 +1,21 @@ +//===-- Implementation header of inet_addr ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_ARPA_INET_INET_ADDR_H +#define LLVM_LIBC_SRC_ARPA_INET_INET_ADDR_H + +#include "include/llvm-libc-types/in_addr_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +in_addr_t inet_addr(const char *cp); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_ARPA_INET_INET_ADDR_H diff --git a/libc/test/src/arpa/inet/CMakeLists.txt b/libc/test/src/arpa/inet/CMakeLists.txt index 690f751bef5e1..1d400c4374f70 100644 --- a/libc/test/src/arpa/inet/CMakeLists.txt +++ b/libc/test/src/arpa/inet/CMakeLists.txt @@ -22,6 +22,17 @@ add_libc_unittest( libc.src.arpa.inet.ntohs ) +add_libc_unittest( + inet_addr + SUITE + libc_arpa_inet_unittests + SRCS + inet_addr_test.cpp + DEPENDS + libc.src.arpa.inet.htonl + libc.src.arpa.inet.inet_addr +) + add_libc_unittest( inet_aton SUITE diff --git a/libc/test/src/arpa/inet/inet_addr_test.cpp b/libc/test/src/arpa/inet/inet_addr_test.cpp new file mode 100644 index 0000000000000..3f0ea0fc3b3f3 --- /dev/null +++ b/libc/test/src/arpa/inet/inet_addr_test.cpp @@ -0,0 +1,25 @@ +//===-- Unittests for inet_addr -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/arpa/inet/htonl.h" +#include "src/arpa/inet/inet_addr.h" +#include "test/UnitTest/Test.h" + +namespace LIBC_NAMESPACE_DECL { + +TEST(LlvmLibcInetAddr, ValidTest) { + ASSERT_EQ(htonl(0x7f010204), inet_addr("127.1.2.4")); + ASSERT_EQ(htonl(0x7f010004), inet_addr("127.1.4")); +} + +TEST(LlvmLibcInetAddr, InvalidTest) { + ASSERT_EQ(htonl(0xffffffff), inet_addr("")); + ASSERT_EQ(htonl(0xffffffff), inet_addr("x")); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libcxx/include/span b/libcxx/include/span index 3d4f9e4ba7831..1911badd88cb1 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -310,30 +310,32 @@ public: } template - _LIBCPP_HIDE_FROM_ABI constexpr span first() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span first() const noexcept { static_assert(_Count <= _Extent, "span::first(): Count out of range"); return span{data(), _Count}; } template - _LIBCPP_HIDE_FROM_ABI constexpr span last() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span last() const noexcept { static_assert(_Count <= _Extent, "span::last(): Count out of range"); return span{data() + size() - _Count, _Count}; } - _LIBCPP_HIDE_FROM_ABI constexpr span first(size_type __count) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span + first(size_type __count) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span::first(count): count out of range"); return {data(), __count}; } - _LIBCPP_HIDE_FROM_ABI constexpr span last(size_type __count) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span + last(size_type __count) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span::last(count): count out of range"); return {data() + size() - __count, __count}; } template - _LIBCPP_HIDE_FROM_ABI constexpr auto - subspan() const noexcept -> span { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto subspan() const noexcept + -> span { static_assert(_Offset <= _Extent, "span::subspan(): Offset out of range"); static_assert(_Count == dynamic_extent || _Count <= _Extent - _Offset, "span::subspan(): Offset + Count out of range"); @@ -342,7 +344,7 @@ public: return _ReturnType{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; } - _LIBCPP_HIDE_FROM_ABI constexpr span + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__offset <= size(), "span::subspan(offset, count): offset out of range"); if (__count == dynamic_extent) @@ -352,52 +354,58 @@ public: return {data() + __offset, __count}; } - _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return _Extent; } - _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { return _Extent * sizeof(element_type); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return _Extent; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { + return _Extent * sizeof(element_type); + } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const noexcept { return _Extent == 0; } - _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__idx < size(), "span::operator[](index): index out of range"); return __data_[__idx]; } # if _LIBCPP_STD_VER >= 26 - _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const { if (__index >= size()) std::__throw_out_of_range("span"); return __data_[__index]; } # endif - _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span::front() on empty span"); return __data_[0]; } - _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span::back() on empty span"); return __data_[size() - 1]; } - _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; } // [span.iter], span iterator support - _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept { # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS return std::__make_bounded_iter(data(), data(), data() + size()); # else return iterator(data()); # endif } - _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept { # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS return std::__make_bounded_iter(data() + size(), data(), data() + size()); # else return iterator(data() + size()); # endif } - _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept { + return reverse_iterator(begin()); + } _LIBCPP_HIDE_FROM_ABI span __as_bytes() const noexcept { return span{reinterpret_cast(data()), size_bytes()}; @@ -478,36 +486,38 @@ public: : __data_{__other.data()}, __size_{__other.size()} {} template - _LIBCPP_HIDE_FROM_ABI constexpr span first() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span first() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Count <= size(), "span::first(): Count out of range"); return span{data(), _Count}; } template - _LIBCPP_HIDE_FROM_ABI constexpr span last() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span last() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Count <= size(), "span::last(): Count out of range"); return span{data() + size() - _Count, _Count}; } - _LIBCPP_HIDE_FROM_ABI constexpr span first(size_type __count) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span + first(size_type __count) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span::first(count): count out of range"); return {data(), __count}; } - _LIBCPP_HIDE_FROM_ABI constexpr span last(size_type __count) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span + last(size_type __count) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span::last(count): count out of range"); return {data() + size() - __count, __count}; } template - _LIBCPP_HIDE_FROM_ABI constexpr span subspan() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span subspan() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Offset <= size(), "span::subspan(): Offset out of range"); _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Count == dynamic_extent || _Count <= size() - _Offset, "span::subspan(): Offset + Count out of range"); return span{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; } - constexpr span _LIBCPP_HIDE_FROM_ABI + [[nodiscard]] constexpr span _LIBCPP_HIDE_FROM_ABI subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__offset <= size(), "span::subspan(offset, count): offset out of range"); if (__count == dynamic_extent) @@ -517,52 +527,58 @@ public: return {data() + __offset, __count}; } - _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return __size_; } - _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { return __size_ * sizeof(element_type); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return __size_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { + return __size_ * sizeof(element_type); + } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const noexcept { return __size_ == 0; } - _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__idx < size(), "span::operator[](index): index out of range"); return __data_[__idx]; } # if _LIBCPP_STD_VER >= 26 - _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const { if (__index >= size()) std::__throw_out_of_range("span"); return __data_[__index]; } # endif - _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span::front() on empty span"); return __data_[0]; } - _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span::back() on empty span"); return __data_[size() - 1]; } - _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; } // [span.iter], span iterator support - _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept { # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS return std::__make_bounded_iter(data(), data(), data() + size()); # else return iterator(data()); # endif } - _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept { # ifdef _LIBCPP_ABI_BOUNDED_ITERATORS return std::__make_bounded_iter(data() + size(), data(), data() + size()); # else return iterator(data() + size()); # endif } - _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept { + return reverse_iterator(begin()); + } _LIBCPP_HIDE_FROM_ABI span __as_bytes() const noexcept { return {reinterpret_cast(data()), size_bytes()}; @@ -585,13 +601,13 @@ inline constexpr bool ranges::enable_view> = true; // as_bytes & as_writable_bytes template -_LIBCPP_HIDE_FROM_ABI auto as_bytes(span<_Tp, _Extent> __s) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto as_bytes(span<_Tp, _Extent> __s) noexcept { return __s.__as_bytes(); } template requires(!is_const_v<_Tp>) -_LIBCPP_HIDE_FROM_ABI auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept { return __s.__as_writable_bytes(); } diff --git a/libcxx/test/libcxx/containers/views/views.span/nodiscard.verify.cpp b/libcxx/test/libcxx/containers/views/views.span/nodiscard.verify.cpp new file mode 100644 index 0000000000000..666680597f147 --- /dev/null +++ b/libcxx/test/libcxx/containers/views/views.span/nodiscard.verify.cpp @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++20 + +// + +// Check that functions are marked [[nodiscard]] + +#include + +#include "test_macros.h" + +void test() { + { // Test with a static extent + std::span sp; + + sp.first<0>(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.last<0>(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.first(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.last(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.subspan<0, 0>(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.subspan(0, 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.size(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.size_bytes(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp[0]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#if TEST_STD_VER >= 26 + sp.at(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#endif + sp.front(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.back(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.data(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.end(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.rbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.rend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + std::as_bytes(sp); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_writable_bytes(sp); + } + { // Test with a dynamic extent + std::span sp; + + sp.first<0>(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.last<0>(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.first(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.last(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.subspan<0, 0>(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.subspan(0, 0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.size(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.size_bytes(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp[0]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#if TEST_STD_VER >= 26 + sp.at(0); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +#endif + sp.front(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.back(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.data(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.begin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.end(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.rbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + sp.rend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + std::as_bytes(sp); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_writable_bytes(sp); + } +} diff --git a/lldb/bindings/lua/lua-typemaps.swig b/lldb/bindings/lua/lua-typemaps.swig index f2a7401419368..a4a77b1a214c9 100644 --- a/lldb/bindings/lua/lua-typemaps.swig +++ b/lldb/bindings/lua/lua-typemaps.swig @@ -122,9 +122,9 @@ LLDB_NUMBER_TYPEMAP(enum SWIGTYPE); } // Disable default type checking for this method to avoid SWIG dispatch issues. -// +// // Problem: SBThread::GetStopDescription has two overloads: -// 1. GetStopDescription(char* dst_or_null, size_t dst_len) +// 1. GetStopDescription(char* dst_or_null, size_t dst_len) // 2. GetStopDescription(lldb::SBStream& stream) // // SWIG generates a dispatch function to select the correct overload based on argument types. @@ -132,9 +132,9 @@ LLDB_NUMBER_TYPEMAP(enum SWIGTYPE); // However, this dispatcher doesn't consider typemaps that transform function signatures. // // In lua, our typemap converts GetStopDescription(char*, size_t) to GetStopDescription(int). -// The dispatcher still checks against the original (char*, size_t) signature instead of +// The dispatcher still checks against the original (char*, size_t) signature instead of // the transformed (int) signature, causing type matching to fail. -// This only affects SBThread::GetStopDescription since the type check also matches +// This only affects SBThread::GetStopDescription since the type check also matches // the argument name, which is unique to this function. %typemap(typecheck, precedence=SWIG_TYPECHECK_POINTER) (char *dst_or_null, size_t dst_len) "" @@ -251,7 +251,8 @@ LLDB_NUMBER_TYPEMAP(enum SWIGTYPE); %typemap(in) lldb::FileSP { luaL_Stream *p = (luaL_Stream *)luaL_checkudata(L, $input, LUA_FILEHANDLE); lldb::FileSP file_sp; - file_sp = std::make_shared(p->f, false); + file_sp = std::make_shared( + p->f, lldb_private::NativeFile::eOpenOptionReadWrite, false); if (!file_sp->IsValid()) return luaL_error(L, "Invalid file"); $1 = file_sp; diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index bd2884481ee95..c28d6ce792184 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -5896,7 +5896,7 @@ The fields used by CP for code objects before V3 also match those specified in GFX950 roundup(lds-size / (320 * 4)) GFX125* - roundup(lds-size / (256 * 4)) + roundup(lds-size / (512 * 4)) 24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution _INVALID_OPERATION with specified exceptions diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 438a84d39ed66..7e95545425f2d 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -1200,3 +1200,14 @@ Windows When compiling with clang-cl, CMake may use ``llvm-mt`` as the Manifest Tool when available. ```llvm-mt``` is only present when libxml2 is found at build-time. To ensure using Microsoft's Manifest Tool set `CMAKE_MT=mt`. + +Apple/OSX +--------- + +**CMAKE_OSX_SYSROOT**:STRING + When compiling for OSX, in order for the test suite to find libSystem to link + dylib tests you'll need to run CMake with ```xcrun --show-sdk-path``` as the + string to pass in so that the testsuite can find your os libraries. + + This will show up as ```ld: library not found for -lSystem``` when running + tests. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/GetDylibInterface.h b/llvm/include/llvm/ExecutionEngine/Orc/GetDylibInterface.h deleted file mode 100644 index 077d88d1758a0..0000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/GetDylibInterface.h +++ /dev/null @@ -1,41 +0,0 @@ -//===---- GetDylibInterface.h - Get interface for real dylib ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Get symbol interface from a real dynamic library or TAPI file. These -// interfaces can be used to simulate weak linking (ld64 -weak-lx / -// -weak_library) against a library that is absent at runtime. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_GETDYLIBINTERFACE_H -#define LLVM_EXECUTIONENGINE_ORC_GETDYLIBINTERFACE_H - -#include "llvm/ExecutionEngine/Orc/Core.h" -#include "llvm/Support/Compiler.h" - -namespace llvm::orc { - -/// Returns a SymbolNameSet containing the exported symbols defined in the -/// given dylib. -LLVM_ABI Expected -getDylibInterfaceFromDylib(ExecutionSession &ES, Twine Path); - -/// Returns a SymbolNameSet containing the exported symbols defined in the -/// relevant slice of the TapiUniversal file. -LLVM_ABI Expected -getDylibInterfaceFromTapiFile(ExecutionSession &ES, Twine Path); - -/// Returns a SymbolNameSet containing the exported symbols defined in the -/// relevant slice of the given file, which may be either a dylib or a tapi -/// file. -LLVM_ABI Expected getDylibInterface(ExecutionSession &ES, - Twine Path); - -} // namespace llvm::orc - -#endif // LLVM_EXECUTIONENGINE_ORC_GETDYLIBINTERFACE_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachO.h b/llvm/include/llvm/ExecutionEngine/Orc/MachO.h index a0342d8d75bc2..0e789b5e05a75 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachO.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachO.h @@ -13,6 +13,7 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_MACHO_H #define LLVM_EXECUTIONENGINE_ORC_MACHO_H +#include "llvm/ExecutionEngine/Orc/CoreContainers.h" #include "llvm/ExecutionEngine/Orc/LoadLinkableFile.h" #include "llvm/Object/Archive.h" #include "llvm/Support/Compiler.h" @@ -31,6 +32,7 @@ class MachOUniversalBinary; namespace orc { +class ExecutionSession; class JITDylib; class ObjectLayer; @@ -93,6 +95,37 @@ class ForceLoadMachOArchiveMembers { bool ObjCOnly; }; +using GetFallbackArchsFn = + unique_function>( + uint32_t CPUType, uint32_t CPUSubType)>; + +/// Match the exact CPU type/subtype only. +LLVM_ABI SmallVector> +noFallbackArchs(uint32_t CPUType, uint32_t CPUSubType); + +/// Match standard dynamic loader fallback rules. +LLVM_ABI SmallVector> +standardMachOFallbackArchs(uint32_t CPUType, uint32_t CPUSubType); + +/// Returns a SymbolNameSet containing the exported symbols defined in the +/// given dylib. +LLVM_ABI Expected getDylibInterfaceFromDylib( + ExecutionSession &ES, Twine Path, + GetFallbackArchsFn GetFallbackArchs = standardMachOFallbackArchs); + +/// Returns a SymbolNameSet containing the exported symbols defined in the +/// relevant slice of the TapiUniversal file. +LLVM_ABI Expected getDylibInterfaceFromTapiFile( + ExecutionSession &ES, Twine Path, + GetFallbackArchsFn GetFallbackArchs = standardMachOFallbackArchs); + +/// Returns a SymbolNameSet containing the exported symbols defined in the +/// relevant slice of the given file, which may be either a dylib or a tapi +/// file. +LLVM_ABI Expected getDylibInterface( + ExecutionSession &ES, Twine Path, + GetFallbackArchsFn GetFallbackArchs = standardMachOFallbackArchs); + } // namespace orc } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index f34392538a7cb..db16a3005f6c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -26,7 +26,6 @@ add_llvm_component_library(LLVMOrcJIT ExecutionUtils.cpp ExecutorResolutionGenerator.cpp ObjectFileInterface.cpp - GetDylibInterface.cpp IndirectionUtils.cpp InProcessMemoryAccess.cpp IRCompileLayer.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/GetDylibInterface.cpp b/llvm/lib/ExecutionEngine/Orc/GetDylibInterface.cpp deleted file mode 100644 index 9ccb211931a5b..0000000000000 --- a/llvm/lib/ExecutionEngine/Orc/GetDylibInterface.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===-------- GetDylibInterface.cpp - Get interface for real dylib --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/Orc/GetDylibInterface.h" - -#include "llvm/BinaryFormat/Magic.h" -#include "llvm/Object/MachO.h" -#include "llvm/Object/MachOUniversal.h" -#include "llvm/Object/TapiUniversal.h" - -#define DEBUG_TYPE "orc" - -namespace llvm::orc { - -Expected getDylibInterfaceFromDylib(ExecutionSession &ES, - Twine Path) { - auto CPUType = MachO::getCPUType(ES.getTargetTriple()); - if (!CPUType) - return CPUType.takeError(); - - auto CPUSubType = MachO::getCPUSubType(ES.getTargetTriple()); - if (!CPUSubType) - return CPUSubType.takeError(); - - auto Buf = MemoryBuffer::getFile(Path); - if (!Buf) - return createFileError(Path, Buf.getError()); - - auto BinFile = object::createBinary((*Buf)->getMemBufferRef()); - if (!BinFile) - return BinFile.takeError(); - - std::unique_ptr MachOFile; - if (isa(**BinFile)) - MachOFile.reset(dyn_cast(BinFile->release())); - else if (auto *MachOUni = - dyn_cast(BinFile->get())) { - for (auto &O : MachOUni->objects()) { - if (O.getCPUType() == *CPUType && - (O.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) == *CPUSubType) { - if (auto Obj = O.getAsObjectFile()) - MachOFile = std::move(*Obj); - else - return Obj.takeError(); - break; - } - } - if (!MachOFile) - return make_error("MachO universal binary at " + Path + - " does not contain a slice for " + - ES.getTargetTriple().str(), - inconvertibleErrorCode()); - } else - return make_error("File at " + Path + " is not a MachO", - inconvertibleErrorCode()); - - if (MachOFile->getHeader().filetype != MachO::MH_DYLIB) - return make_error("MachO at " + Path + " is not a dylib", - inconvertibleErrorCode()); - - SymbolNameSet Symbols; - for (auto &Sym : MachOFile->symbols()) { - if (auto Name = Sym.getName()) - Symbols.insert(ES.intern(*Name)); - else - return Name.takeError(); - } - - return std::move(Symbols); -} - -Expected getDylibInterfaceFromTapiFile(ExecutionSession &ES, - Twine Path) { - SymbolNameSet Symbols; - - auto TapiFileBuffer = MemoryBuffer::getFile(Path); - if (!TapiFileBuffer) - return createFileError(Path, TapiFileBuffer.getError()); - - auto Tapi = - object::TapiUniversal::create((*TapiFileBuffer)->getMemBufferRef()); - if (!Tapi) - return Tapi.takeError(); - - auto CPUType = MachO::getCPUType(ES.getTargetTriple()); - if (!CPUType) - return CPUType.takeError(); - - auto CPUSubType = MachO::getCPUSubType(ES.getTargetTriple()); - if (!CPUSubType) - return CPUSubType.takeError(); - - auto &IF = (*Tapi)->getInterfaceFile(); - auto Interface = - IF.extract(MachO::getArchitectureFromCpuType(*CPUType, *CPUSubType)); - if (!Interface) - return Interface.takeError(); - - for (auto *Sym : (*Interface)->exports()) - Symbols.insert(ES.intern(Sym->getName())); - - return Symbols; -} - -Expected getDylibInterface(ExecutionSession &ES, Twine Path) { - file_magic Magic; - if (auto EC = identify_magic(Path, Magic)) - return createFileError(Path, EC); - - switch (Magic) { - case file_magic::macho_universal_binary: - case file_magic::macho_dynamically_linked_shared_lib: - return getDylibInterfaceFromDylib(ES, Path); - case file_magic::tapi_file: - return getDylibInterfaceFromTapiFile(ES, Path); - default: - return make_error("Cannot get interface for " + Path + - " unrecognized file type", - inconvertibleErrorCode()); - } -} - -} // namespace llvm::orc diff --git a/llvm/lib/ExecutionEngine/Orc/MachO.cpp b/llvm/lib/ExecutionEngine/Orc/MachO.cpp index 89721d16930c0..731d24d1272d4 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachO.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachO.cpp @@ -10,9 +10,11 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/Layer.h" #include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/TapiUniversal.h" #include "llvm/Support/FileSystem.h" #define DEBUG_TYPE "orc" @@ -280,5 +282,177 @@ Expected ForceLoadMachOArchiveMembers::operator()( return true; } +LLVM_ABI SmallVector> +noFallbackArchs(uint32_t CPUType, uint32_t CPUSubType) { + SmallVector> Result; + Result.push_back({CPUType, CPUSubType}); + return Result; +} + +SmallVector> +standardMachOFallbackArchs(uint32_t CPUType, uint32_t CPUSubType) { + SmallVector> Archs; + + // Match given CPU type/subtype first. + Archs.push_back({CPUType, CPUSubType}); + + switch (CPUType) { + case MachO::CPU_TYPE_ARM64: + // Handle arm64 variants. + switch (CPUSubType) { + case MachO::CPU_SUBTYPE_ARM64_ALL: + Archs.push_back({CPUType, MachO::CPU_SUBTYPE_ARM64E}); + break; + default: + break; + } + break; + default: + break; + } + + return Archs; +} + +Expected +getDylibInterfaceFromDylib(ExecutionSession &ES, Twine Path, + GetFallbackArchsFn GetFallbackArchs) { + auto InitCPUType = MachO::getCPUType(ES.getTargetTriple()); + if (!InitCPUType) + return InitCPUType.takeError(); + + auto InitCPUSubType = MachO::getCPUSubType(ES.getTargetTriple()); + if (!InitCPUSubType) + return InitCPUSubType.takeError(); + + auto Buf = MemoryBuffer::getFile(Path); + if (!Buf) + return createFileError(Path, Buf.getError()); + + auto BinFile = object::createBinary((*Buf)->getMemBufferRef()); + if (!BinFile) + return BinFile.takeError(); + + std::unique_ptr MachOFile; + if (isa(**BinFile)) { + MachOFile.reset(dyn_cast(BinFile->release())); + + // TODO: Check that dylib arch is compatible. + } else if (auto *MachOUni = + dyn_cast(BinFile->get())) { + SmallVector> ArchsToTry; + if (GetFallbackArchs) + ArchsToTry = GetFallbackArchs(*InitCPUType, *InitCPUSubType); + else + ArchsToTry.push_back({*InitCPUType, *InitCPUSubType}); + + for (auto &[CPUType, CPUSubType] : ArchsToTry) { + for (auto &O : MachOUni->objects()) { + if (O.getCPUType() == CPUType && + (O.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) == CPUSubType) { + if (auto Obj = O.getAsObjectFile()) + MachOFile = std::move(*Obj); + else + return Obj.takeError(); + break; + } + } + if (MachOFile) // If found, break out. + break; + } + if (!MachOFile) + return make_error( + "MachO universal binary at " + Path + + " does not contain a compatible slice for " + + ES.getTargetTriple().str(), + inconvertibleErrorCode()); + } else + return make_error("File at " + Path + " is not a MachO", + inconvertibleErrorCode()); + + if (MachOFile->getHeader().filetype != MachO::MH_DYLIB) + return make_error("MachO at " + Path + " is not a dylib", + inconvertibleErrorCode()); + + SymbolNameSet Symbols; + for (auto &Sym : MachOFile->symbols()) { + if (auto Name = Sym.getName()) + Symbols.insert(ES.intern(*Name)); + else + return Name.takeError(); + } + + return std::move(Symbols); +} + +Expected +getDylibInterfaceFromTapiFile(ExecutionSession &ES, Twine Path, + GetFallbackArchsFn GetFallbackArchs) { + SymbolNameSet Symbols; + + auto TapiFileBuffer = MemoryBuffer::getFile(Path); + if (!TapiFileBuffer) + return createFileError(Path, TapiFileBuffer.getError()); + + auto Tapi = + object::TapiUniversal::create((*TapiFileBuffer)->getMemBufferRef()); + if (!Tapi) + return Tapi.takeError(); + + auto InitCPUType = MachO::getCPUType(ES.getTargetTriple()); + if (!InitCPUType) + return InitCPUType.takeError(); + + auto InitCPUSubType = MachO::getCPUSubType(ES.getTargetTriple()); + if (!InitCPUSubType) + return InitCPUSubType.takeError(); + + SmallVector> ArchsToTry; + if (GetFallbackArchs) + ArchsToTry = GetFallbackArchs(*InitCPUType, *InitCPUSubType); + else + ArchsToTry.push_back({*InitCPUType, *InitCPUSubType}); + + auto &IF = (*Tapi)->getInterfaceFile(); + + auto ArchSet = IF.getArchitectures(); + for (auto [CPUType, CPUSubType] : ArchsToTry) { + auto A = MachO::getArchitectureFromCpuType(CPUType, CPUSubType); + if (ArchSet.has(A)) { + if (auto Interface = IF.extract(A)) { + for (auto *Sym : (*Interface)->exports()) + Symbols.insert(ES.intern(Sym->getName())); + return Symbols; + } else + return Interface.takeError(); + } + } + + return make_error( + "MachO interface file at " + Path + + " does not contain a compatible slice for " + + ES.getTargetTriple().str(), + inconvertibleErrorCode()); +} + +Expected getDylibInterface(ExecutionSession &ES, Twine Path, + GetFallbackArchsFn GetFallbackArchs) { + file_magic Magic; + if (auto EC = identify_magic(Path, Magic)) + return createFileError(Path, EC); + + switch (Magic) { + case file_magic::macho_universal_binary: + case file_magic::macho_dynamically_linked_shared_lib: + return getDylibInterfaceFromDylib(ES, Path, std::move(GetFallbackArchs)); + case file_magic::tapi_file: + return getDylibInterfaceFromTapiFile(ES, Path, std::move(GetFallbackArchs)); + default: + return make_error("Cannot get interface for " + Path + + " unrecognized file type", + inconvertibleErrorCode()); + } +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 97298f9d74171..d0c4b1b9f83fd 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -662,12 +662,15 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { Function *Thunk = buildExitThunk(F->getFunctionType(), F->getAttributes()); CallInst *GuardCheck = B.CreateCall( GuardFnType, GuardCheckLoad, {F, Thunk}); + Value *GuardCheckDest = B.CreateExtractValue(GuardCheck, 0); + Value *GuardFinalDest = B.CreateExtractValue(GuardCheck, 1); // Ensure that the first argument is passed in the correct register. GuardCheck->setCallingConv(CallingConv::CFGuard_Check); SmallVector Args(llvm::make_pointer_range(GuestExit->args())); - CallInst *Call = B.CreateCall(Arm64Ty, GuardCheck, Args); + OperandBundleDef OB("cfguardtarget", GuardFinalDest); + CallInst *Call = B.CreateCall(Arm64Ty, GuardCheckDest, Args, OB); Call->setTailCallKind(llvm::CallInst::TCK_MustTail); if (Call->getType()->isVoidTy()) @@ -767,11 +770,21 @@ void AArch64Arm64ECCallLowering::lowerCall(CallBase *CB) { CallInst *GuardCheck = B.CreateCall(GuardFnType, GuardCheckLoad, {CalledOperand, Thunk}, Bundles); + Value *GuardCheckDest = B.CreateExtractValue(GuardCheck, 0); + Value *GuardFinalDest = B.CreateExtractValue(GuardCheck, 1); // Ensure that the first argument is passed in the correct register. GuardCheck->setCallingConv(CallingConv::CFGuard_Check); - CB->setCalledOperand(GuardCheck); + // Update the call: set the callee, and add a bundle with the final + // destination, + CB->setCalledOperand(GuardCheckDest); + OperandBundleDef OB("cfguardtarget", GuardFinalDest); + auto *NewCall = CallBase::addOperandBundle(CB, LLVMContext::OB_cfguardtarget, + OB, CB->getIterator()); + NewCall->copyMetadata(*CB); + CB->replaceAllUsesWith(NewCall); + CB->eraseFromParent(); } bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { @@ -789,7 +802,8 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { I64Ty = Type::getInt64Ty(M->getContext()); VoidTy = Type::getVoidTy(M->getContext()); - GuardFnType = FunctionType::get(PtrTy, {PtrTy, PtrTy}, false); + GuardFnType = + FunctionType::get(StructType::get(PtrTy, PtrTy), {PtrTy, PtrTy}, false); DispatchFnType = FunctionType::get(PtrTy, {PtrTy, PtrTy, PtrTy}, false); GuardFnCFGlobal = M->getOrInsertGlobal("__os_arm64x_check_icall_cfg", PtrTy); GuardFnGlobal = M->getOrInsertGlobal("__os_arm64x_check_icall", PtrTy); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 34c85d588f9c4..e2a79a49a9e92 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -162,7 +162,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[ ]>; let Entry = 1 in -def CC_AArch64_Win64PCS : CallingConv; +def CC_AArch64_Win64PCS : CallingConv> + ], + AArch64_Common) +>; // Vararg functions on windows pass floats in integer registers let Entry = 1 in @@ -177,6 +183,9 @@ def CC_AArch64_Win64_VarArg : CallingConv<[ // a stack layout compatible with the x64 calling convention. let Entry = 1 in def CC_AArch64_Arm64EC_VarArg : CallingConv<[ + // 'CFGuardTarget' is used for Arm64EC; it passes its parameter in X9. + CCIfCFGuardTarget>, + CCIfNest>, // Convert small floating-point values to integer. @@ -345,7 +354,7 @@ def CC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[ let Entry = 1 in def RetCC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[ - CCIfType<[i64], CCAssignToReg<[X11]>> + CCIfType<[i64], CCAssignToReg<[X11, X9]>> ]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 8bfdbb7c5c310..33cef795b111d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1186,21 +1186,20 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // Make clamp modifier on NaN input returns 0. ProgInfo.DX10Clamp = Mode.DX10Clamp; - unsigned LDSAlignShift; - if (STM.getFeatureBits().test(FeatureAddressableLocalMemorySize327680)) { - // LDS is allocated in 256 dword blocks. - LDSAlignShift = 10; - } else if (STM.getFeatureBits().test( - FeatureAddressableLocalMemorySize163840)) { - // LDS is allocated in 320 dword blocks. + unsigned LDSAlignShift = 8; + switch (getLdsDwGranularity(STM)) { + case 512: + case 320: LDSAlignShift = 11; - } else if (STM.getFeatureBits().test( - FeatureAddressableLocalMemorySize65536)) { - // LDS is allocated in 128 dword blocks. + break; + case 128: LDSAlignShift = 9; - } else { - // LDS is allocated in 64 dword blocks. + break; + case 64: LDSAlignShift = 8; + break; + default: + llvm_unreachable("invald LDS block size"); } ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs(); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index aff4cfe1dc70e..998a2b5b36a87 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3565,8 +3565,15 @@ bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, } unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { - return ST.hasFeature(AMDGPU::FeatureAddressableLocalMemorySize327680) ? 256 - : 128; + if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768)) + return 64; + if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536)) + return 128; + if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) + return 320; + if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680)) + return 512; + return 64; // In sync with getAddressableLocalMemorySize } bool isPackedFP32Inst(unsigned Opc) { diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f0bdf472b96ed..ff9d9723dddea 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -683,7 +683,7 @@ let hasSideEffects = false in { defm CVT_to_tf32_rn_relu : CVT_TO_TF32<"rn.relu">; defm CVT_to_tf32_rz_relu : CVT_TO_TF32<"rz.relu">; defm CVT_to_tf32_rna : CVT_TO_TF32<"rna", [hasPTX<70>, hasSM<80>]>; - defm CVT_to_tf32_rna_satf : CVT_TO_TF32<"rna.satfinite", [hasPTX<81>, hasSM<89>]>; + defm CVT_to_tf32_rna_satf : CVT_TO_TF32<"rna.satfinite", [hasPTX<81>, hasSM<80>]>; defm CVT_to_tf32_rn_satf : CVT_TO_TF32<"rn.satfinite", [hasPTX<86>, hasSM<100>]>; defm CVT_to_tf32_rz_satf : CVT_TO_TF32<"rz.satfinite", [hasPTX<86>, hasSM<100>]>; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 10588b9739188..75ce1b144a2e7 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -222,7 +222,6 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseRegReg(OperandVector &Operands); ParseStatus parseXSfmmVType(OperandVector &Operands); - ParseStatus parseRetval(OperandVector &Operands); ParseStatus parseZcmpStackAdj(OperandVector &Operands, bool ExpectNegative = false); ParseStatus parseZcmpNegStackAdj(OperandVector &Operands) { @@ -1655,10 +1654,6 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError( Operands, ErrorInfo, -1, (1 << 5) - 1, "immediate must be non-zero in the range"); - case Match_InvalidXSfmmVType: { - SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); - return generateXSfmmVTypeError(ErrorLoc); - } case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return generateVTypeError(ErrorLoc); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td index d77a44a0d9e01..a3e02ee4fc430 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td @@ -13,7 +13,6 @@ def XSfmmVTypeAsmOperand : AsmOperandClass { let Name = "XSfmmVType"; let ParserMethod = "parseXSfmmVType"; - let DiagnosticType = "InvalidXSfmmVType"; let RenderMethod = "addVTypeIOperands"; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index fba1ccf2c8c9b..f153db177cac1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2638,16 +2638,6 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, if (Shr->isExact()) return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal)); - if (C.isZero()) { - // == 0 is u< 1. - if (Pred == CmpInst::ICMP_EQ) - return new ICmpInst(CmpInst::ICMP_ULT, X, - ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal))); - else - return new ICmpInst(CmpInst::ICMP_UGT, X, - ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1)); - } - if (Shr->hasOneUse()) { // Canonicalize the shift into an 'and': // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt) diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll index 55c3e7779478e..afabf6ce0fdf2 100644 --- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll @@ -228,7 +228,7 @@ define i64 @known_power_of_two_urem_loop_lshr(i64 %size, i64 %a) { ; CHECK-NEXT: [[UREM:%.*]] = and i64 [[SIZE:%.*]], [[TMP0]] ; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] ; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], 1 -; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 2 +; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0 ; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i64 [[SUM]] @@ -328,7 +328,7 @@ define i64 @known_power_of_two_urem_loop_ashr_negative_2(i64 %size, i64 %a) { ; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] ; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]] ; CHECK-NEXT: [[I]] = ashr i64 [[PHI]], 2 -; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 4 +; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0 ; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i64 [[SUM]] diff --git a/llvm/test/CodeGen/AArch64/arm64ec-indirect-call.ll b/llvm/test/CodeGen/AArch64/arm64ec-indirect-call.ll new file mode 100644 index 0000000000000..e6a42c382e4f6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64ec-indirect-call.ll @@ -0,0 +1,50 @@ +; RUN: llc -mtriple=arm64ec-pc-windows-msvc < %s | FileCheck %s + +define void @simple(ptr %g) { +; CHECK-LABEL: "#simple": +; CHECK: str x30, [sp, #-16]! +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$v +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: mov x11, x0 +; CHECK-NEXT: blr x8 +; CHECK-NEXT: blr x11 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret + +entry: + call void %g() + ret void +} + +; Make sure the check for the security cookie doesn't use x9. +define void @stackguard(ptr %g) sspreq { +; CHECK-LABEL: "#stackguard": +; CHECK: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: adrp x8, __security_cookie +; CHECK-NEXT: ldr x10, [sp, #8] +; CHECK-NEXT: ldr x8, [x8, :lo12:__security_cookie] +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: b.ne .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: fmov d0, #1.00000000 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: br x11 + +entry: + %call = tail call double %g(double noundef 1.000000e+00) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll index 4349b18fd394c..b31e87c54b563 100644 --- a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll @@ -31,10 +31,10 @@ ; GFX1200-MESA: .long 45100 ; GFX1200-MESA-NEXT: .long 1024 -; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x200 +; GFX1250-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x100 ; GFX1250-MESA: .long 45100 -; GFX1250-MESA-NEXT: .long 512 +; GFX1250-MESA-NEXT: .long 256 @lds = internal addrspace(3) global [4096 x i8] poison diff --git a/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll b/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll index 3db0fa8f21759..7e8d5e0f30b9e 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-size-hsa-gfx1250.ll @@ -41,7 +41,7 @@ define amdgpu_kernel void @test_lds_i32(i32 %val) { ; GCN-LABEL: test_lds_array_i8: ; GCN: .amdhsa_group_segment_fixed_size 327680 ; GCN: ; LDSByteSize: 327680 bytes/workgroup -; MESA: granulated_lds_size = 320 +; MESA: granulated_lds_size = 160 define amdgpu_kernel void @test_lds_array_i8() { %gep = getelementptr inbounds [327679 x i8], ptr addrspace(3) @lds.array.i8, i32 0, i32 5 %val = load i8, ptr addrspace(3) %gep @@ -52,7 +52,7 @@ define amdgpu_kernel void @test_lds_array_i8() { ; GCN-LABEL: test_lds_array_i16: ; GCN: .amdhsa_group_segment_fixed_size 327680 ; GCN: ; LDSByteSize: 327680 bytes/workgroup -; MESA: granulated_lds_size = 320 +; MESA: granulated_lds_size = 160 define amdgpu_kernel void @test_lds_array_i16() { %gep = getelementptr inbounds [163839 x i16], ptr addrspace(3) @lds.array.i16, i32 0, i32 10 %val = load i16, ptr addrspace(3) %gep @@ -63,7 +63,7 @@ define amdgpu_kernel void @test_lds_array_i16() { ; GCN-LABEL: test_lds_array_i32: ; GCN: .amdhsa_group_segment_fixed_size 327680 ; GCN: ; LDSByteSize: 327680 bytes/workgroup -; MESA: granulated_lds_size = 320 +; MESA: granulated_lds_size = 160 define amdgpu_kernel void @test_lds_array_i32() { %gep = getelementptr inbounds [81919 x i32], ptr addrspace(3) @lds.array.i32, i32 0, i32 20 %val = load i32, ptr addrspace(3) %gep diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx1250.ll index f934c85f68e0f..68694faf833e9 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx1250.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx1250.ll @@ -114,7 +114,7 @@ ; CHECK-NEXT: .entry_point: _amdgpu_gs ; CHECK-NEXT: .entry_point_symbol: gs_shader ; CHECK-NEXT: .forward_progress: true -; CHECK-NEXT: .lds_size: 0x400 +; CHECK-NEXT: .lds_size: 0x800 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false ; CHECK-NEXT: .scratch_memory_size: 0 diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx950.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx950.ll new file mode 100644 index 0000000000000..b3575c68b892f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.gfx950.ll @@ -0,0 +1,215 @@ +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx950 <%s | FileCheck %s --check-prefixes=CHECK + +; CHECK-LABEL: {{^}}_amdgpu_cs_main: +; CHECK: ; TotalNumSgprs: 6 +; CHECK: ; NumVgprs: 1 +; CHECK: .amdgpu_pal_metadata +; CHECK-NEXT: --- +; CHECK-NEXT: amdpal.pipelines: +; CHECK-NEXT: - .api: Vulkan +; CHECK-NEXT: .compute_registers: +; CHECK-NEXT: .tg_size_en: true +; CHECK-NEXT: .tgid_x_en: false +; CHECK-NEXT: .tgid_y_en: false +; CHECK-NEXT: .tgid_z_en: false +; CHECK-NEXT: .tidig_comp_cnt: 0x1 +; CHECK-NEXT: .graphics_registers: +; CHECK-NEXT: .ps_extra_lds_size: 0 +; CHECK-NEXT: .spi_ps_input_addr: +; CHECK-NEXT: .ancillary_ena: false +; CHECK-NEXT: .front_face_ena: true +; CHECK-NEXT: .line_stipple_tex_ena: false +; CHECK-NEXT: .linear_center_ena: true +; CHECK-NEXT: .linear_centroid_ena: true +; CHECK-NEXT: .linear_sample_ena: true +; CHECK-NEXT: .persp_center_ena: true +; CHECK-NEXT: .persp_centroid_ena: true +; CHECK-NEXT: .persp_pull_model_ena: false +; CHECK-NEXT: .persp_sample_ena: true +; CHECK-NEXT: .pos_fixed_pt_ena: true +; CHECK-NEXT: .pos_w_float_ena: false +; CHECK-NEXT: .pos_x_float_ena: false +; CHECK-NEXT: .pos_y_float_ena: false +; CHECK-NEXT: .pos_z_float_ena: false +; CHECK-NEXT: .sample_coverage_ena: false +; CHECK-NEXT: .spi_ps_input_ena: +; CHECK-NEXT: .ancillary_ena: false +; CHECK-NEXT: .front_face_ena: false +; CHECK-NEXT: .line_stipple_tex_ena: false +; CHECK-NEXT: .linear_center_ena: false +; CHECK-NEXT: .linear_centroid_ena: false +; CHECK-NEXT: .linear_sample_ena: false +; CHECK-NEXT: .persp_center_ena: false +; CHECK-NEXT: .persp_centroid_ena: false +; CHECK-NEXT: .persp_pull_model_ena: false +; CHECK-NEXT: .persp_sample_ena: true +; CHECK-NEXT: .pos_fixed_pt_ena: false +; CHECK-NEXT: .pos_w_float_ena: false +; CHECK-NEXT: .pos_x_float_ena: false +; CHECK-NEXT: .pos_y_float_ena: false +; CHECK-NEXT: .pos_z_float_ena: false +; CHECK-NEXT: .sample_coverage_ena: false +; CHECK-NEXT: .hardware_stages: +; CHECK-NEXT: .cs: +; CHECK-NEXT: .agpr_count: 0 +; CHECK-NEXT: .checksum_value: 0x9444d7d0 +; CHECK-NEXT: .debug_mode: false +; CHECK-NEXT: .entry_point: _amdgpu_cs +; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main +; CHECK-NEXT: .excp_en: 0 +; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: false +; CHECK-NEXT: .ieee_mode: false +; CHECK-NEXT: .image_op: false +; CHECK-NEXT: .lds_size: 0 +; CHECK-NEXT: .mem_ordered: false +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0xa +; CHECK-NEXT: .sgpr_limit: 0x6a +; CHECK-NEXT: .threadgroup_dimensions: +; CHECK-NEXT: - 0x1 +; CHECK-NEXT: - 0x400 +; CHECK-NEXT: - 0x1 +; CHECK-NEXT: .trap_present: false +; CHECK-NEXT: .user_data_reg_map: +; CHECK-NEXT: - 0x10000000 +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0 +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: .user_sgprs: 0x3 +; CHECK-NEXT: .vgpr_count: 0x2 +; CHECK-NEXT: .vgpr_limit: 0x100 +; CHECK-NEXT: .wavefront_size: 0x20 +; CHECK-NEXT: .wgp_mode: false +; CHECK-NEXT: .gs: +; CHECK-NEXT: .agpr_count: 0 +; CHECK-NEXT: .debug_mode: false +; CHECK-NEXT: .entry_point: _amdgpu_gs +; CHECK-NEXT: .entry_point_symbol: gs_shader +; CHECK-NEXT: .forward_progress: false +; CHECK-NEXT: .ieee_mode: false +; CHECK-NEXT: .lds_size: 0x500 +; CHECK-NEXT: .mem_ordered: false +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x6 +; CHECK-NEXT: .vgpr_count: 0x1 +; CHECK-NEXT: .wgp_mode: false +; CHECK-NEXT: .hs: +; CHECK-NEXT: .agpr_count: 0 +; CHECK-NEXT: .debug_mode: false +; CHECK-NEXT: .entry_point: _amdgpu_hs +; CHECK-NEXT: .entry_point_symbol: hs_shader +; CHECK-NEXT: .forward_progress: false +; CHECK-NEXT: .ieee_mode: false +; CHECK-NEXT: .lds_size: 0xa00 +; CHECK-NEXT: .mem_ordered: false +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x6 +; CHECK-NEXT: .vgpr_count: 0x1 +; CHECK-NEXT: .wgp_mode: false +; CHECK-NEXT: .ps: +; CHECK-NEXT: .agpr_count: 0 +; CHECK-NEXT: .debug_mode: false +; CHECK-NEXT: .entry_point: _amdgpu_ps +; CHECK-NEXT: .entry_point_symbol: ps_shader +; CHECK-NEXT: .forward_progress: false +; CHECK-NEXT: .ieee_mode: false +; CHECK-NEXT: .lds_size: 0 +; CHECK-NEXT: .mem_ordered: false +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x6 +; CHECK-NEXT: .vgpr_count: 0x1 +; CHECK-NEXT: .wgp_mode: false +; CHECK: .registers: {} +; CHECK:amdpal.version: +; CHECK-NEXT: - 0x3 +; CHECK-NEXT: - 0 +; CHECK-NEXT:... +; CHECK-NEXT: .end_amdgpu_pal_metadata + +define amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 { +.entry: + %i = call i64 @llvm.amdgcn.s.getpc() + %i1 = and i64 %i, -4294967296 + %i2 = zext i32 %arg1 to i64 + %i3 = or i64 %i1, %i2 + %i4 = inttoptr i64 %i3 to ptr addrspace(4) + %i5 = and i32 %arg2, 1023 + %i6 = lshr i32 %arg2, 10 + %i7 = and i32 %i6, 1023 + %i8 = add nuw nsw i32 %i7, %i5 + %i9 = load <4 x i32>, ptr addrspace(4) %i4, align 16 + %.idx = shl nuw nsw i32 %i8, 2 + call void @llvm.amdgcn.raw.buffer.store.i32(i32 1, <4 x i32> %i9, i32 %.idx, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @ps_shader() #1 { + ret void +} + +@LDS.GS = external addrspace(3) global [1 x i32], align 4 + +define amdgpu_gs void @gs_shader() { + %ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0 + store i32 0, ptr addrspace(3) %ptr, align 4 + ret void +} + +@LDS.HS = external addrspace(3) global [1024 x i32], align 4 + +define amdgpu_hs void @hs_shader() { + %ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0 + store i32 0, ptr addrspace(3) %ptr, align 4 + ret void +} + +!amdgpu.pal.metadata.msgpack = !{!0} + +; Function Attrs: nounwind willreturn memory(none) +declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.amdgcn.s.getpc() #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3 + +attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" } + +attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" } + +!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size \B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"} +!1 = !{i32 7} diff --git a/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll new file mode 100644 index 0000000000000..f47c2f2a85156 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | FileCheck %s +; RUN: %if ptxas-sm_80 && ptxas-isa-8.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | %ptxas-verify -arch=sm_80 %} + +; CHECK-LABEL: cvt_rna_satfinite_tf32_f32 +define i32 @cvt_rna_satfinite_tf32_f32(float %f1) { +; CHECK-LABEL: cvt_rna_satfinite_tf32_f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [cvt_rna_satfinite_tf32_f32_param_0]; +; CHECK-NEXT: cvt.rna.satfinite.tf32.f32 %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %val = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %f1) + ret i32 %val +} diff --git a/llvm/test/CodeGen/NVPTX/convert-sm89.ll b/llvm/test/CodeGen/NVPTX/convert-sm89.ll index 616dcfa330e81..170c120162cc3 100644 --- a/llvm/test/CodeGen/NVPTX/convert-sm89.ll +++ b/llvm/test/CodeGen/NVPTX/convert-sm89.ll @@ -84,10 +84,3 @@ define <2 x half> @cvt_rn_relu_f16x2_e5m2x2(i16 %in) { %val = call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 %in); ret <2 x half> %val } - -; CHECK-LABEL: cvt_rna_satfinite_tf32_f32 -define i32 @cvt_rna_satfinite_tf32_f32(float %f1) { -; CHECK: cvt.rna.satfinite.tf32.f32 - %val = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %f1) - ret i32 %val -} diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo.tbd b/llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo_arm64.tbd similarity index 100% rename from llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo.tbd rename to llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo_arm64.tbd diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo_arm64e.tbd b/llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo_arm64e.tbd new file mode 100644 index 0000000000000..7b21ab0cff165 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/Inputs/MachO_Foo_arm64e.tbd @@ -0,0 +1,23 @@ +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64e-macos ] +uuids: + - target: arm64e-macos + value: 00000000-0000-0000-0000-000000000000 +flags: [ installapi ] +install-name: Foo.framework/Foo +current-version: 1.2.3 +compatibility-version: 1.2 +swift-abi-version: 5 +parent-umbrella: + - targets: [ arm64e-macos ] + umbrella: System +exports: + - targets: [ arm64e-macos ] + symbols: [ _foo ] + objc-classes: [] + objc-eh-types: [] + objc-ivars: [] + weak-symbols: [] + thread-local-symbols: [] +... diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_weak_link.test b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_weak_link.test index 4326a604297b6..37847918bfe88 100644 --- a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_weak_link.test +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_weak_link.test @@ -1,8 +1,14 @@ # RUN: rm -rf %t && mkdir -p %t # RUN: llvm-mc -triple=arm64-apple-darwin19 -filetype=obj -o %t/main.o \ # RUN: %S/Inputs/MachO_main_ret_foo.s -# RUN: llvm-jitlink -noexec %t/main.o -weak_library %S/Inputs/MachO_Foo.tbd - +# RUN: llvm-jitlink -noexec %t/main.o -weak_library \ +# RUN: %S/Inputs/MachO_Foo_arm64.tbd +# RUN: llvm-jitlink -noexec %t/main.o -weak_library \ +# RUN: %S/Inputs/MachO_Foo_arm64e.tbd +# # Check that we can load main.o, which unconditionally uses symbol foo, by # using -weak_library on a TBD file to emulate forced weak linking against # a library that supplies foo, but is missing at runtime. +# +# Check that weak linking works for arm64 JIT'd programs even if the TBD +# file contains only an arm64e interface. diff --git a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s index 80a340c1f6261..566e8554765ca 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s @@ -52,7 +52,7 @@ // OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 00f0 00000cc0 80000000 00040000 00000000 // max_lds_size -// OBJDUMP-NEXT: 0100 00000600 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0100 00000500 00000000 00000000 00000000 // OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0130 00000cc0 80000000 00040000 00000000 @@ -231,13 +231,13 @@ max_vgprs: .p2align 6 .amdhsa_kernel max_lds_size - .amdhsa_group_segment_fixed_size 393216 + .amdhsa_group_segment_fixed_size 327680 .amdhsa_next_free_vgpr 1 .amdhsa_next_free_sgpr 1 .end_amdhsa_kernel // ASM: .amdhsa_kernel max_lds_size -// ASM: .amdhsa_group_segment_fixed_size 393216 +// ASM: .amdhsa_group_segment_fixed_size 327680 // ASM: .end_amdhsa_kernel // Test maximum VGPR allocation diff --git a/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s index 642e62df0437a..0d6bc61ac7753 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s @@ -52,7 +52,7 @@ // OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 00f0 00000cc0 80000000 00040000 00000000 // max_lds_size -// OBJDUMP-NEXT: 0100 00000600 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0100 00000500 00000000 00000000 00000000 // OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0130 00000cc0 80000000 00040000 00000000 @@ -231,13 +231,13 @@ max_vgprs: .p2align 6 .amdhsa_kernel max_lds_size - .amdhsa_group_segment_fixed_size 393216 + .amdhsa_group_segment_fixed_size 327680 .amdhsa_next_free_vgpr 1 .amdhsa_next_free_sgpr 1 .end_amdhsa_kernel // ASM: .amdhsa_kernel max_lds_size -// ASM: .amdhsa_group_segment_fixed_size 393216 +// ASM: .amdhsa_group_segment_fixed_size 327680 // ASM: .end_amdhsa_kernel // Test maximum VGPR allocation diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index 8aceba04e0aeb..532e8b014b328 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -579,7 +579,7 @@ define i1 @ashr_ugt_0(i4 %x) { define i1 @ashr_ugt_0_multiuse(i4 %x, ptr %p) { ; CHECK-LABEL: @ashr_ugt_0_multiuse( ; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ne i4 [[S]], 0 ; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret i1 [[R]] ; @@ -934,7 +934,7 @@ define i1 @lshr_eq_0_multiuse(i8 %x) { ; CHECK-LABEL: @lshr_eq_0_multiuse( ; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2 ; CHECK-NEXT: call void @use(i8 [[S]]) -; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i8 %x, 2 @@ -947,7 +947,7 @@ define i1 @lshr_ne_0_multiuse(i8 %x) { ; CHECK-LABEL: @lshr_ne_0_multiuse( ; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2 ; CHECK-NEXT: call void @use(i8 [[S]]) -; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i8 %x, 2 @@ -960,7 +960,7 @@ define i1 @ashr_eq_0_multiuse(i8 %x) { ; CHECK-LABEL: @ashr_eq_0_multiuse( ; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2 ; CHECK-NEXT: call void @use(i8 [[S]]) -; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i8 %x, 2 @@ -973,7 +973,7 @@ define i1 @ashr_ne_0_multiuse(i8 %x) { ; CHECK-LABEL: @ashr_ne_0_multiuse( ; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2 ; CHECK-NEXT: call void @use(i8 [[S]]) -; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i8 %x, 2 @@ -982,6 +982,46 @@ define i1 @ashr_ne_0_multiuse(i8 %x) { ret i1 %c } +define i1 @lshr_eq_0(i8 %x) { +; CHECK-LABEL: @lshr_eq_0( +; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4 +; CHECK-NEXT: ret i1 [[C]] +; + %s = lshr i8 %x, 2 + %c = icmp eq i8 %s, 0 + ret i1 %c +} + +define i1 @lshr_ne_0(i8 %x) { +; CHECK-LABEL: @lshr_ne_0( +; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3 +; CHECK-NEXT: ret i1 [[C]] +; + %s = lshr i8 %x, 2 + %c = icmp ne i8 %s, 0 + ret i1 %c +} + +define i1 @ashr_eq_0(i8 %x) { +; CHECK-LABEL: @ashr_eq_0( +; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4 +; CHECK-NEXT: ret i1 [[C]] +; + %s = ashr i8 %x, 2 + %c = icmp eq i8 %s, 0 + ret i1 %c +} + +define i1 @ashr_ne_0(i8 %x) { +; CHECK-LABEL: @ashr_ne_0( +; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3 +; CHECK-NEXT: ret i1 [[C]] +; + %s = ashr i8 %x, 2 + %c = icmp ne i8 %s, 0 + ret i1 %c +} + define i1 @lshr_exact_eq_0_multiuse(i8 %x) { ; CHECK-LABEL: @lshr_exact_eq_0_multiuse( ; CHECK-NEXT: [[S:%.*]] = lshr exact i8 [[X:%.*]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 76fa6bdb543a7..f37f62ffa8aa0 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -914,7 +914,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 ; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 9 +; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902 diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll index a598f154ef54b..010890e30f4eb 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -65,7 +65,7 @@ define void @scalar_store(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 @@ -125,7 +125,7 @@ define void @expansion(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll index 4274719f2efd3..9ff9f92c4edca 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll @@ -9,13 +9,11 @@ target triple = "thumbv6m-none-none-eabi" define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef %pResult) #0 { ; CHECK-LABEL: @arm_mean_q7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16 -; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] -; CHECK: while.body.preheader: -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4 -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE:%.*]], 4 +; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[SHR]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER:%.*]] ] ; CHECK-NEXT: [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRC:%.*]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1 @@ -30,8 +28,8 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]] ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: -; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_BODY_PREHEADER]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15 ; CHECK-NEXT: [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll index eb5e279947ecb..3585fe9f757d3 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll @@ -32,7 +32,7 @@ define i32 @ctlz_loop_with_abs(i32 %n) { ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP1]] = lshr i32 [[N_ADDR_03]], 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_02]], 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp samesign ult i32 [[N_ADDR_03]], 2 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END]], label [[WHILE_BODY]] ; CHECK: while.end: ; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC]], [[WHILE_BODY]] ] diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 50b4ac372b4e4..217e521b2e43e 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -27,7 +27,6 @@ #include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" #include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" -#include "llvm/ExecutionEngine/Orc/GetDylibInterface.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h" #include "llvm/ExecutionEngine/Orc/JITLinkReentryTrampolines.h" diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn index 84384217897c4..0034cd9993b88 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn @@ -35,7 +35,6 @@ static_library("Orc") { "ExecutionUtils.cpp", "ExecutorProcessControl.cpp", "ExecutorResolutionGenerator.cpp", - "GetDylibInterface.cpp", "IRCompileLayer.cpp", "IRPartitionLayer.cpp", "IRTransformLayer.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn index e47ca1e07670a..e5599cb80c0ea 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -69,6 +69,12 @@ tablegen("AMDGPUGenRegisterBank") { td_file = "AMDGPU.td" } +tablegen("AMDGPUGenSDNodeInfo") { + visibility = [ ":LLVMAMDGPUCodeGen" ] + args = [ "-gen-sd-node-info" ] + td_file = "AMDGPU.td" +} + tablegen("InstCombineTables") { visibility = [ ":LLVMAMDGPUCodeGen" ] args = [ "-gen-searchable-tables" ] @@ -103,6 +109,7 @@ static_library("LLVMAMDGPUCodeGen") { ":AMDGPUGenPreLegalizeGICombiner", ":AMDGPUGenRegBankGICombiner", ":AMDGPUGenRegisterBank", + ":AMDGPUGenSDNodeInfo", ":InstCombineTables", ":R600GenCallingConv", ":R600GenDAGISel", @@ -161,6 +168,7 @@ static_library("LLVMAMDGPUCodeGen") { "AMDGPULibCalls.cpp", "AMDGPULibFunc.cpp", "AMDGPULowerBufferFatPointers.cpp", + "AMDGPULowerExecSync.cpp", "AMDGPULowerIntrinsics.cpp", "AMDGPULowerKernelArguments.cpp", "AMDGPULowerKernelAttributes.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn index 2bce96859f8bc..c368cfe46405e 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn @@ -30,6 +30,12 @@ tablegen("PPCGenRegisterBank") { td_file = "PPC.td" } +tablegen("PPCGenSDNodeInfo") { + visibility = [ ":LLVMPowerPCCodeGen" ] + args = [ "-gen-sd-node-info" ] + td_file = "PPC.td" +} + static_library("LLVMPowerPCCodeGen") { deps = [ ":PPCGenCallingConv", @@ -37,6 +43,7 @@ static_library("LLVMPowerPCCodeGen") { ":PPCGenFastISel", ":PPCGenGlobalISel", ":PPCGenRegisterBank", + ":PPCGenSDNodeInfo", "AsmParser:PPCGenAsmMatcher", "MCTargetDesc", "TargetInfo", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/BUILD.gn index a5718e0c48feb..f360b01274705 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/BUILD.gn @@ -12,10 +12,17 @@ tablegen("SystemZGenDAGISel") { td_file = "SystemZ.td" } +tablegen("SystemZGenSDNodeInfo") { + visibility = [ ":LLVMSystemZCodeGen" ] + args = [ "-gen-sd-node-info" ] + td_file = "SystemZ.td" +} + static_library("LLVMSystemZCodeGen") { deps = [ ":SystemZGenCallingConv", ":SystemZGenDAGISel", + ":SystemZGenSDNodeInfo", "MCTargetDesc", "TargetInfo", "//llvm/include/llvm/Config:llvm-config", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index f22ee4f31741b..2a1348ffb5295 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -90,7 +90,6 @@ static_library("LLVMX86CodeGen") { "X86CmovConversion.cpp", "X86CodeGenPassBuilder.cpp", "X86CompressEVEX.cpp", - "X86DiscriminateMemOps.cpp", "X86DomainReassignment.cpp", "X86DynAllocaExpander.cpp", "X86ExpandPseudo.cpp", @@ -110,7 +109,6 @@ static_library("LLVMX86CodeGen") { "X86ISelLoweringCall.cpp", "X86IndirectBranchTracking.cpp", "X86IndirectThunks.cpp", - "X86InsertPrefetch.cpp", "X86InsertWait.cpp", "X86InstCombineIntrinsic.cpp", "X86InstrFMA3Info.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index e40a8ee04dd38..61dedc1ab0f69 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -32,7 +32,6 @@ unittest("CodeGenTests") { "LexicalScopesTest.cpp", "LowLevelTypeTest.cpp", "MIR2VecTest.cpp", - "MLRegAllocDevelopmentFeatures.cpp", "MachineBasicBlockTest.cpp", "MachineDomTreeUpdaterTest.cpp", "MachineInstrBundleIteratorTest.cpp", diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 79bc380dbcb7a..0164a2fb9fa81 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -628,6 +628,8 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { /*default=*/"false", "Replace memref arguments in GPU functions with bare pointers. " "All memrefs must have static shape.">, + Option<"allowPatternRollback", "allow-pattern-rollback", "bool", "true", + "Experimental performance flag to disallow pattern rollback">, ListOption<"allowedDialects", "allowed-dialects", "std::string", "Run conversion patterns of only the specified dialects">, ]; diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h index fccb49d49da70..34c85de3418ec 100644 --- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h @@ -58,6 +58,10 @@ struct GPUToNVVMPipelineOptions "Whether to use the bareptr calling convention on the host (warning " "this should be false until the GPU layering is fixed)"), llvm::cl::init(false)}; + PassOptions::Option allowPatternRollback{ + *this, "allow-pattern-rollback", + llvm::cl::desc("Allow pattern rollback during dialect conversion"), + llvm::cl::init(true)}; }; // Options for the gpu to xevm pipeline. diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index d64c4d64cad84..5848489274c13 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -419,7 +419,10 @@ struct LowerGpuOpsToNVVMOpsPass final if (this->hasRedux) populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns); configureGpuToNVVMConversionLegality(target); - if (failed(applyPartialConversion(m, target, std::move(llvmPatterns)))) + ConversionConfig config; + config.allowPatternRollback = allowPatternRollback; + if (failed( + applyPartialConversion(m, target, std::move(llvmPatterns), config))) signalPassFailure(); } }; diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp index 2c3e4661d266a..5462cddd44718 100644 --- a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp +++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp @@ -72,6 +72,7 @@ void buildGpuPassPipeline(OpPassManager &pm, ConvertGpuOpsToNVVMOpsOptions opt; opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv; opt.indexBitwidth = options.indexBitWidth; + opt.allowPatternRollback = options.allowPatternRollback; pm.addNestedPass(createConvertGpuOpsToNVVMOps(opt)); pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass(createCSEPass()); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index a4b5dde8a2187..f1cc1eb983267 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 allow-pattern-rollback=0' -split-input-file | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 allowed-dialects=func,arith,cf' -split-input-file | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-bare-ptr-memref-call-conv=1' -split-input-file | FileCheck %s --check-prefix=CHECK-BARE // RUN: mlir-opt %s -transform-interpreter | FileCheck %s diff --git a/mlir/test/Conversion/GPUToNVVM/memref.mlir b/mlir/test/Conversion/GPUToNVVM/memref.mlir index e164ca9103dee..a4e8ead344114 100644 --- a/mlir/test/Conversion/GPUToNVVM/memref.mlir +++ b/mlir/test/Conversion/GPUToNVVM/memref.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-nvvm | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-nvvm="allow-pattern-rollback=0" | FileCheck %s // RUN: mlir-opt %s -convert-gpu-to-nvvm='use-bare-ptr-memref-call-conv=1' \ // RUN: | FileCheck %s --check-prefix=BARE diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir index b479467efc208..82c02c1d6ee63 100644 --- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt --convert-gpu-to-nvvm --split-input-file %s | FileCheck %s +// RUN: mlir-opt --convert-gpu-to-nvvm="allow-pattern-rollback=0" --split-input-file %s | FileCheck %s // RUN: mlir-opt --convert-gpu-to-nvvm="index-bitwidth=32" --split-input-file %s | FileCheck --check-prefix=CHECK32 %s gpu.module @test_module { diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir index 5585d98c25b82..d0001f6ffc376 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-maxsi.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-maxsi.mlir index cd90ce3ba2f1a..fcff5f40a6cc7 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-maxsi.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-maxsi.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-minsi.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-minsi.mlir index fec2567f47f15..4718ac94fa0f2 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-minsi.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-minsi.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir index d5633b00313b3..5e3a7e7e7d729 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir index db297b0fc27b7..f1a48ae0c19c5 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir index 65cbc79752177..f0a46cea7ceb9 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir index a0c955e4b570c..ddbabd4ddf960 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/alloc-host-shared.mlir b/mlir/test/Integration/GPU/CUDA/alloc-host-shared.mlir index f041df82b4325..5c56e2ddfbd51 100644 --- a/mlir/test/Integration/GPU/CUDA/alloc-host-shared.mlir +++ b/mlir/test/Integration/GPU/CUDA/alloc-host-shared.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/assert.mlir b/mlir/test/Integration/GPU/CUDA/assert.mlir index 71a21cf4bd620..83cf70cd17078 100644 --- a/mlir/test/Integration/GPU/CUDA/assert.mlir +++ b/mlir/test/Integration/GPU/CUDA/assert.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: mlir-opt %s -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/command-line-arg.mlir b/mlir/test/Integration/GPU/CUDA/command-line-arg.mlir index 34dde6e03c80e..77a4fa089b62d 100644 --- a/mlir/test/Integration/GPU/CUDA/command-line-arg.mlir +++ b/mlir/test/Integration/GPU/CUDA/command-line-arg.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8'" -debug-only=serialize-to-binary \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8' allow-pattern-rollback=0" -debug-only=serialize-to-binary \ // RUN: 2>&1 | FileCheck %s func.func @host_function(%arg0 : f32, %arg1 : memref) { diff --git a/mlir/test/Integration/GPU/CUDA/concurrent-kernels.mlir b/mlir/test/Integration/GPU/CUDA/concurrent-kernels.mlir index ed01416d9523a..51f6e36aaa977 100644 --- a/mlir/test/Integration/GPU/CUDA/concurrent-kernels.mlir +++ b/mlir/test/Integration/GPU/CUDA/concurrent-kernels.mlir @@ -2,7 +2,7 @@ // increment a global atomic counter and wait for the counter to reach 2. // // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | env CUDA_MODULE_LOADING=EAGER mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir index 27ec1ec435fef..efffcaaf23b2e 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="allow-pattern-rollback=0" -debug-only=serialize-to-isa \ // RUN: 2>&1 | FileCheck %s // CHECK-LABEL: Generated by LLVM NVPTX Back-End diff --git a/mlir/test/Integration/GPU/CUDA/dump-sass.mlir b/mlir/test/Integration/GPU/CUDA/dump-sass.mlir index d32f5efc29d58..f810678569615 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-sass.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-sass.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=dump-sass \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="allow-pattern-rollback=0" -debug-only=dump-sass \ // RUN: 2>&1 | FileCheck %s // CHECK: MOV diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir index 07f3218ae89b2..fe3c2b1d93a1b 100644 --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir index b2ac90acde94f..f8f1aa8aaa42e 100644 --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/printf.mlir b/mlir/test/Integration/GPU/CUDA/printf.mlir index fd664f2331488..ef116760b69e5 100644 --- a/mlir/test/Integration/GPU/CUDA/printf.mlir +++ b/mlir/test/Integration/GPU/CUDA/printf.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir index a6207d64c038b..a4be5223cd792 100644 --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir index c3cee2fda46f3..3490003d6ba19 100644 --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \ // RUN: | mlir-runner \ // RUN: --shared-libs=%mlir_cuda_runtime \ // RUN: --shared-libs=%mlir_runner_utils \ diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 85c64ffd58ca6..635f77215b38f 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2934,6 +2934,10 @@ llvm_target_lib_list = [lib for lib in [ ["-gen-exegesis"], "lib/Target/PowerPC/PPCGenExegesis.inc", ), + ( + ["-gen-sd-node-info"], + "lib/Target/PowerPC/PPCGenSDNodeInfo.inc", + ), ], }, {