Skip to content

Commit fb04b56

Browse files
committed
Merge from 'main' to 'sycl-web' (15 commits)
CONFLICT (content): Merge conflict in llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
2 parents ac4966d + 36dbe51 commit fb04b56

File tree

112 files changed

+2335
-1181
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+2335
-1181
lines changed

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2441,6 +2441,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
24412441

24422442
assert(FKI.TargetOffset == 0 && "0-bit relocation offset expected");
24432443
const uint64_t RelOffset = Fixup.getOffset();
2444+
auto [RelSymbol, RelAddend] = extractFixupExpr(Fixup);
24442445

24452446
uint32_t RelType;
24462447
if (Fixup.isPCRel()) {
@@ -2452,6 +2453,9 @@ class X86MCPlusBuilder : public MCPlusBuilder {
24522453
case 32: RelType = ELF::R_X86_64_PC32; break;
24532454
case 64: RelType = ELF::R_X86_64_PC64; break;
24542455
}
2456+
// Adjust PC-relative fixup offsets, which are calculated from the start
2457+
// of the next instruction.
2458+
RelAddend -= FKI.TargetSize / 8;
24552459
} else {
24562460
switch (FKI.TargetSize) {
24572461
default:
@@ -2463,8 +2467,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
24632467
}
24642468
}
24652469

2466-
auto [RelSymbol, RelAddend] = extractFixupExpr(Fixup);
2467-
24682470
return Relocation({RelOffset, RelSymbol, RelType, RelAddend, 0});
24692471
}
24702472

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,10 +1253,12 @@ def offload_host_device : Flag<["--"], "offload-host-device">,
12531253
def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
12541254
InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
12551255
"'-aux-target-cpu' and '-aux-target-feature'.">;
1256+
def offload_arch_tool_EQ : Joined<["--"], "offload-arch-tool=">,
1257+
HelpText<"Tool used for detecting offloading architectures in the system.">;
12561258
def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">,
1257-
HelpText<"Tool used for detecting AMD GPU arch in the system.">;
1259+
Alias<offload_arch_tool_EQ>;
12581260
def nvptx_arch_tool_EQ : Joined<["--"], "nvptx-arch-tool=">,
1259-
HelpText<"Tool used for detecting NVIDIA GPU arch in the system.">;
1261+
Alias<offload_arch_tool_EQ>;
12601262

12611263
defm gpu_rdc : BoolFOption<"gpu-rdc",
12621264
LangOpts<"GPURelocatableDeviceCode">, DefaultFalse,

clang/lib/Basic/Targets/AMDGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
441441
// pre-defined macros.
442442
bool handleTargetFeatures(std::vector<std::string> &Features,
443443
DiagnosticsEngine &Diags) override {
444+
HasFullBFloat16 = true;
444445
auto TargetIDFeatures =
445446
getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));
446447
for (const auto &F : Features) {

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,7 @@ Expected<SmallVector<std::string>>
836836
AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
837837
// Detect AMD GPUs availible on the system.
838838
std::string Program;
839-
if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
839+
if (Arg *A = Args.getLastArg(options::OPT_offload_arch_tool_EQ))
840840
Program = A->getValue();
841841
else
842842
Program = GetProgramPath("amdgpu-arch");

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -898,7 +898,7 @@ Expected<SmallVector<std::string>>
898898
NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
899899
// Detect NVIDIA GPUs availible on the system.
900900
std::string Program;
901-
if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
901+
if (Arg *A = Args.getLastArg(options::OPT_offload_arch_tool_EQ))
902902
Program = A->getValue();
903903
else
904904
Program = GetProgramPath("nvptx-arch");

clang/test/CodeGen/AArch64/fmv-dependencies.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,6 @@ int caller() {
189189
// CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve-aes,+sve2,+sve2-aes,+v8a"
190190
// CHECK: attributes #[[sve2_bitperm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve-bitperm,+sve2,+sve2-bitperm,+v8a"
191191
// CHECK: attributes #[[sve2_sha3]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sha2,+sha3,+sve,+sve-sha3,+sve2,+sve2-sha3,+v8a"
192-
// CHECK: attributes #[[sve2_sm4]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sm4,+sve,+sve2,+sve2-sm4,+v8a"
192+
// CHECK: attributes #[[sve2_sm4]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sm4,+sve,+sve-sm4,+sve2,+sve2-sm4,+v8a"
193193
// CHECK: attributes #[[wfxt]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,+wfxt"
194194
// CHECK: attributes #[[cssc]] = { {{.*}} "target-features"="+cssc,+fp-armv8,+neon,+outline-atomics,+v8a"

clang/test/CodeGen/AMDGPU/full-bf16.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu tahiti -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu fiji -emit-llvm -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s
6+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
7+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
8+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
9+
10+
// CHECK-LABEL: define dso_local bfloat @div(
11+
// CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
12+
// CHECK-NEXT: [[ENTRY:.*:]]
13+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
14+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
15+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
16+
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
17+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
18+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
19+
// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR_ASCAST]], align 2
20+
// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR_ASCAST]], align 2
21+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
22+
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR_ASCAST]], align 2
23+
// CHECK-NEXT: [[DIV:%.*]] = fdiv bfloat [[TMP0]], [[TMP1]]
24+
// CHECK-NEXT: ret bfloat [[DIV]]
25+
//
26+
__bf16 div(__bf16 a, __bf16 b) {
27+
return a / b;
28+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/sh
2+
echo "sm_70"
3+
echo "gfx906"
4+
exit 0

clang/test/Driver/aarch64-implied-sve-features.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
// SVE2-SHA3-REVERT: "-target-feature" "+sve" "-target-feature" "-sve-sha3" "-target-feature" "+sve2" "-target-feature" "-sve2-sha3"
5050

5151
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sm4+nosve2-sm4 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SM4-REVERT
52-
// SVE2-SM4-REVERT: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "-sve2-sm4"
52+
// SVE2-SM4-REVERT: "-target-feature" "+sve" "-target-feature" "-sve-sm4" "-target-feature" "+sve2" "-target-feature" "-sve2-sm4"
5353

5454
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sha3 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SHA3
5555
// SVE2-SHA3: "-target-feature" "+sve" "-target-feature" "+sve-sha3" "-target-feature" "+sve2" "-target-feature" "+sve2-sha3"
@@ -61,14 +61,14 @@
6161
// SVE2-AES: "-target-feature" "+sve" "-target-feature" "+sve-aes" "-target-feature" "+sve2" "-target-feature" "+sve2-aes"
6262

6363
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sm4 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SM4
64-
// SVE2-SM4: "-target-feature" "+sve" "-target-feature" "+sve2" "-target-feature" "+sve2-sm4"
64+
// SVE2-SM4: "-target-feature" "+sve" "-target-feature" "+sve-sm4" "-target-feature" "+sve2" "-target-feature" "+sve2-sm4"
6565

6666
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-bitperm+nosve2-aes %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SUBFEATURE-MIX
6767
// SVE2-SUBFEATURE-MIX: "-target-feature" "+sve" "-target-feature" "+sve-bitperm" "-target-feature" "+sve2" "-target-feature" "+sve2-bitperm"
6868
// SVE2-SUBFEATURE-NOT: sve2-aes
6969

7070
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-sm4+nosve2 %s -### 2>&1 | FileCheck %s --check-prefix=SVE2-SUBFEATURE-CONFLICT
71-
// SVE2-SUBFEATURE-CONFLICT: "-target-feature" "+sve" "-target-feature" "-sve2" "-target-feature" "-sve2-sm4"
71+
// SVE2-SUBFEATURE-CONFLICT: "-target-feature" "+sve" "-target-feature" "-sve-sm4" "-target-feature" "-sve2" "-target-feature" "-sve2-sm4"
7272

7373
// RUN: %clang --target=aarch64-linux-gnu -march=armv8-a+sve2-aes+nosve %s -### 2>&1 | FileCheck %s --check-prefix=SVE-SUBFEATURE-CONFLICT
7474
// SVE-SUBFEATURE-CONFLICT-NOT: "-target-feature" "+sve2-aes"

clang/test/Driver/openmp-system-arch.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
77
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
88
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
9+
// RUN: cp %S/Inputs/offload-arch/offload_arch_sm_70_gfx906 %t/
910
// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty
1011
// RUN: chmod +x %t/amdgpu_arch_fail
1112
// RUN: chmod +x %t/amdgpu_arch_gfx906
@@ -14,6 +15,7 @@
1415
// RUN: chmod +x %t/nvptx_arch_fail
1516
// RUN: chmod +x %t/nvptx_arch_sm_70
1617
// RUN: chmod +x %t/nvptx_arch_empty
18+
// RUN: chmod +x %t/offload_arch_sm_70_gfx906
1719

1820
// case when nvptx-arch and amdgpu-arch return nothing or fails
1921
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
@@ -41,23 +43,23 @@
4143

4244
// case when nvptx-arch succeeds.
4345
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
44-
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
46+
// RUN: --amdgpu-arch-tool=%t/amdgpu_arch_fail --nvptx-arch-tool=%t/nvptx_arch_sm_70 %s 2>&1 \
4547
// RUN: | FileCheck %s --check-prefix=ARCH-SM_70
4648
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
47-
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
49+
// RUN: --amdgpu-arch-tool=%t/amdgpu_arch_fail --nvptx-arch-tool=%t/nvptx_arch_sm_70 %s 2>&1 \
4850
// RUN: | FileCheck %s --check-prefix=ARCH-SM_70
4951
// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
5052

5153
// case when both nvptx-arch and amdgpu-arch succeed.
5254
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
53-
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
55+
// RUN: --offload-arch-tool=%t/offload_arch_sm_70_gfx906 %s 2>&1 \
5456
// RUN: | FileCheck %s --check-prefix=ARCH-SM_70-GFX906
5557
// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
5658
// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
5759

5860
// case when both nvptx-arch and amdgpu-arch succeed with other archs.
5961
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native,sm_75,gfx1030 \
60-
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
62+
// RUN: --offload-arch-tool=%t/offload_arch_sm_70_gfx906 %s 2>&1 \
6163
// RUN: | FileCheck %s --check-prefix=ARCH-MULTIPLE
6264
// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"
6365
// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"

0 commit comments

Comments
 (0)