Skip to content

Commit b2aa9f9

Browse files
authored
[SYCL][ESIMD] Run all passes regardless of opt level. (#19411)
The vector backend of the gpu driver doesn't support `-O0`, so we need to optimize even with `-O0` for esimd. This patch also updates all the tests accordingly.
1 parent 66909ed commit b2aa9f9

14 files changed

+187
-424
lines changed

llvm/lib/SYCLPostLink/ESIMDPostSplitProcessing.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,21 @@ ModulePassManager buildESIMDLoweringPipeline(bool OptLevelO0, bool SplitESIMD) {
3434
ModulePassManager MPM;
3535
MPM.addPass(SYCLLowerESIMDPass(!SplitESIMD));
3636

37-
if (!OptLevelO0) {
38-
FunctionPassManager FPM;
39-
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
40-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
41-
}
37+
FunctionPassManager FPM;
38+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
39+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
4240
MPM.addPass(ESIMDOptimizeVecArgCallConvPass{});
4341
FunctionPassManager MainFPM;
4442
MainFPM.addPass(ESIMDLowerLoadStorePass{});
4543

46-
if (!OptLevelO0) {
47-
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
48-
MainFPM.addPass(EarlyCSEPass(true));
49-
MainFPM.addPass(InstCombinePass{});
50-
MainFPM.addPass(DCEPass{});
51-
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
52-
MainFPM.addPass(EarlyCSEPass(true));
53-
MainFPM.addPass(InstCombinePass{});
54-
MainFPM.addPass(DCEPass{});
55-
}
44+
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
45+
MainFPM.addPass(EarlyCSEPass(true));
46+
MainFPM.addPass(InstCombinePass{});
47+
MainFPM.addPass(DCEPass{});
48+
MainFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
49+
MainFPM.addPass(EarlyCSEPass(true));
50+
MainFPM.addPass(InstCombinePass{});
51+
MainFPM.addPass(DCEPass{});
5652
MPM.addPass(ESIMDLowerSLMReservationCalls{});
5753
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
5854
MPM.addPass(GenXSPIRVWriterAdaptor(/*RewriteTypes=*/true,

llvm/test/tools/sycl-post-link/sycl-esimd/basic-esimd-lower.ll

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414
; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O2 -S < %s -o %t.table
1515
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2
1616

17-
; -O0 lowering
18-
; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S < %s -o %t.table
19-
; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O0
20-
2117
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
2218
target triple = "spir64-unknown-linux"
2319

@@ -54,15 +50,6 @@ attributes #0 = { "sycl-module-id"="a.cpp" }
5450
; CHECK-NO-LOWERING: ret void
5551
; CHECK-NO-LOWERING: }
5652

57-
; With -O0, we only lower ESIMD code, but no other optimizations
58-
; CHECK-O0: define dso_local spir_kernel void @ESIMD_kernel() #{{[0-9]}} !sycl_explicit_simd !{{[0-9]}} !intel_reqd_sub_group_size !{{[0-9]}} {
59-
; CHECK-O0: entry:
60-
; CHECK-O0: %0 = load <3 x i64>, {{.*}} addrspacecast {{.*}} @__spirv_BuiltInGlobalInvocationId
61-
; CHECK-O0: %1 = extractelement <3 x i64> %0, i64 0
62-
; CHECK-O0: call void @llvm.genx.barrier()
63-
; CHECK-O0: ret void
64-
; CHECK-O0: }
65-
6653
; With -O2, unused call was optimized away
6754
; CHECK-O2: define dso_local spir_kernel void @ESIMD_kernel()
6855
; CHECK-O2: entry:

llvm/test/tools/sycl-post-link/sycl-post-link-test.ll renamed to llvm/test/tools/sycl-post-link/sycl-esimd/sycl-post-link-test.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@ entry:
2020
store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
2121
ret void
2222
}
23-
; CHECK: %conv.i = zext i32 0 to i64
24-
; CHECK: store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8
25-
; CHECK: %add.i = add i32 0, 3
26-
; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
23+
; CHECK: store i64 0, ptr addrspace(1) %_arg_DoNotOptimize, align 8
24+
; CHECK: store i32 3, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
2725

2826
; Function Attrs: convergent norecurse
2927
define dso_local spir_kernel void @kernel_SubgroupSize(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32)#0 !sycl_explicit_simd !3{
@@ -35,10 +33,8 @@ entry:
3533
store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
3634
ret void
3735
}
38-
; CHECK: %conv.i = zext i32 1 to i64
39-
; CHECK: store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8
40-
; CHECK: %add.i = add i32 1, 7
41-
; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
36+
; CHECK: store i64 1, ptr addrspace(1) %_arg_DoNotOptimize, align 8
37+
; CHECK: store i32 8, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
4238

4339
; Function Attrs: convergent norecurse
4440
define dso_local spir_kernel void @kernel_SubgroupMaxSize(ptr addrspace(1) noundef align 8 %_arg_DoNotOptimize, ptr addrspace(1) noundef align 4 %_arg_DoNotOptimize32) #0 !sycl_explicit_simd !3 {
@@ -50,10 +46,8 @@ entry:
5046
store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
5147
ret void
5248
}
53-
; CHECK: %conv.i = zext i32 1 to i64
54-
; CHECK: store i64 %conv.i, ptr addrspace(1) %_arg_DoNotOptimize, align 8
55-
; CHECK: %add.i = add i32 1, 9
56-
; CHECK: store i32 %add.i, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
49+
; CHECK: store i64 1, ptr addrspace(1) %_arg_DoNotOptimize, align 8
50+
; CHECK: store i32 10, ptr addrspace(1) %_arg_DoNotOptimize32, align 4
5751

5852
attributes #0 = { "sycl-module-id"="a.cpp" }
5953

sycl/test/check_device_code/esimd/fp16_converts.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ __attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
3434
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void bf16_vector() {
3535
simd<float, 8> F32 = 0;
3636
simd<bfloat16, 8> BF16 = F32;
37-
// CHECK: call <8 x half> @llvm.genx.bf.cvt.v8f16.v8f32(<8 x float> {{[^)]+}})
3837
simd<float, 8> F32_conv = BF16;
39-
// CHECK: call <8 x float> @llvm.genx.bf.cvt.v8f32.v8f16(<8 x half> {{[^)]+}})
4038
}
4139

4240
SYCL_ESIMD_FUNCTION SYCL_EXTERNAL void bf16_scalar() {

sycl/test/check_device_code/esimd/intrins_trans.cpp

Lines changed: 25 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,13 @@ test_mem_intrins(int *addr, const vec<float, 8> &xf,
101101
{
102102
uint32_t offset = 128;
103103
vec<int, 8> x = __esimd_slm_block_ld<int, 8, 32>(offset);
104-
// CHECK: %[[VAR_OFF1:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3)
105-
// CHECK-NEXT: load <8 x i32>, ptr addrspace(3) %[[VAR_OFF1]], align 32
104+
// CHECK: load <8 x i32>, ptr addrspace(3) inttoptr (i32 128 to ptr addrspace(3)), align 32
106105
use(x);
107106
}
108107
{
109108
uint32_t offset = 256;
110109
__esimd_slm_block_st<int, 8, 4>(offset, get8i());
111-
// CHECK: %[[VAR_OFF2:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3)
112-
// CHECK-NEXT: store <8 x i32> %{{[a-zA-Z0-9.]+}}, ptr addrspace(3) %[[VAR_OFF2]], align 4
110+
// CHECK: store <8 x i32> %call16, ptr addrspace(3) inttoptr (i32 256 to ptr addrspace(3)), align 4
113111
}
114112
{
115113
auto x = __esimd_svm_gather<unsigned char, 8>(get8ui64(), get8ui16());
@@ -210,40 +208,36 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
210208
v_addr += offsets;
211209

212210
__esimd_svm_atomic0<atomic_op::inc, uint32_t, VL>(v_addr.data(), pred.data());
213-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
211+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> undef, <32 x i64> zeroinitializer, <32 x i32> undef)
214212

215213
__esimd_svm_atomic1<atomic_op::add, uint32_t, VL>(v_addr.data(), v1.data(),
216214
pred.data());
217-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
215+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> undef, <32 x i64> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
218216
__esimd_svm_atomic2<atomic_op::cmpxchg, uint32_t, VL>(
219217
v_addr.data(), v1.data(), v1.data(), pred.data());
220-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
218+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> undef, <32 x i64> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
221219

222220
simd<uint32_t, VL> v00 = __esimd_svm_block_ld<uint32_t, VL, 4>(vec_ptr);
223-
// CHECK: %[[VAR1:[0-9a-zA-Z_.]+]] = load <32 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 4
224221
__esimd_svm_block_st<uint32_t, VL, 128>(vec_ptr, v00.data());
225-
// CHECK-NEXT: store <32 x i32> %[[VAR1]], ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 128
226222

227223
simd<uint32_t, VL> v01 =
228224
__esimd_svm_gather<uint32_t, VL>(v_addr.data(), pred.data());
229-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef)
225+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> undef, i32 0, <32 x i64> zeroinitializer, <32 x i32> undef)
230226

231227
__esimd_svm_scatter<uint32_t, VL>(v_addr.data(), v01.data(), pred.data());
232-
// CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
228+
// CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> undef, i32 0, <32 x i64> zeroinitializer, <32 x i32> %{{[0-9a-zA-Z_.]+}})
233229

234230
simd<short, 16> mina(0, 1);
235231
simd<short, 16> minc(5);
236232
minc = __esimd_smin<short, 16>(mina.data(), minc.data());
237-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i16> @llvm.genx.smin.v16i16.v16i16(<16 x i16> %{{[0-9a-zA-Z_.]+}}, <16 x i16> %{{[0-9a-zA-Z_.]+}})
238233

239234
simd<float, 1> diva(2.f);
240235
simd<float, 1> divb(1.f);
241236
diva = __esimd_ieee_div<float, 1>(diva.data(), divb.data());
242-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <1 x float> @llvm.genx.ieee.div.v1f32(<1 x float> %{{[0-9a-zA-Z_.]+}}, <1 x float> %{{[0-9a-zA-Z_.]+}})
243237

244238
simd<float, 16> a(0.1f);
245239
simd<float, 8> b = __esimd_rdregion<float, 16, 8, 0, 8, 1>(a.data(), 0);
246-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0)
240+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> splat (float 0x3FB99999A0000000), i32 0, i32 8, i32 1, i16 0, i32 0)
247241

248242
simd<float, 16> c(0.0f);
249243

@@ -261,21 +255,17 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
261255
auto d = __esimd_wrregion<float, 16 /*ret size*/, 8 /*write size*/,
262256
0 /*vstride*/, 8 /*row width*/, 1 /*hstride*/>(
263257
c.data() /*dst*/, b.data() /*src*/, 0 /*offset*/);
264-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> %{{[0-9a-zA-Z_.]+}}, <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> splat (i1 true))
258+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> zeroinitializer, <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> splat (i1 true))
265259

266260
simd<int, 32> va;
267261
va = media_block_load<int, 4, 8>(pA, x, y);
268-
// CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %{{[0-9a-zA-Z_.]+}})
269-
// CHECK: store i32 %[[SI0_VAL]], ptr addrspace(4) %[[SI0_ADDR:[0-9a-zA-Z_.]+]]
270-
// CHECK: %[[SI0:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI0_ADDR]]
271-
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %[[SI0]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}})
262+
// CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
263+
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 0, i32 0)
272264

273265
simd<int, 32> vb = va + 1;
274266
media_block_store<int, 4, 8>(pB, x, y, vb);
275-
// CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %{{[0-9a-zA-Z_.]+}})
276-
// CHECK: store i32 %[[SI2_VAL]], ptr addrspace(4) %[[SI2_ADDR:[0-9a-zA-Z_.]+]]
277-
// CHECK: %[[SI2:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI2_ADDR]]
278-
// CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %[[SI2]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
267+
// CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) undef)
268+
// CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 0, i32 0, <32 x i32> %{{[0-9a-zA-Z_.]+}})
279269

280270
auto ee = __esimd_vload<int, 16>((detail::vector_type_t<int, 16> *)(&vg));
281271
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p0(ptr {{.*}})
@@ -291,47 +281,35 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
291281

292282
// 4-byte element gather
293283
simd<int, 8> v = gather<int, 8>(acc, offsets, 100);
294-
// CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
295-
// CHECK-STATEFUL: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]]
296-
// CHECK-STATEFUL: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]]
297-
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
298-
// CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> undef)
284+
// CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
285+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 100, <8 x i32> splat (i32 1), <8 x i1> splat (i1 true))
286+
// CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> splat (i1 true), i32 0, <8 x i64> undef, <8 x i32> undef)
299287

300288
// 4-byte element scatter
301289
scatter<int, 8>(acc, offsets, v, 100, pred);
302-
// CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
303-
// CHECK-STATEFUL: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]]
304-
// CHECK-STATEFUL: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]]
305-
// CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
306-
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
290+
// CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
291+
// CHECK STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 2, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i32> splat (i32 101), <8 x i32> %{{[0-9a-zA-Z_.]+}})
292+
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, <8 x i64> undef, <8 x i32> %{{[0-9a-zA-Z_.]+}})
307293

308294
// 1-byte element gather: same code with and without mask
309295
simd<unsigned char, 8> v1 = gather<unsigned char, 8>(acc, offsets, 100);
310-
// CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
311-
// CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]]
312-
// CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]]
313-
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
314-
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef)
296+
// CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
297+
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i32> splat (i32 1), <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>)
298+
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, <8 x i64> undef, <32 x i8> undef)
315299

316300
// 1-byte element gather using the mask
317301
v1 = gather<unsigned char, 8>(acc, offsets, 100, pred);
318-
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 {{[^)]+}}, i32 {{[^)]+}}, <8 x i32> {{[^)]+}}, <8 x i1> {{[^)]+}})
319-
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> {{[^)]+}}, i32 0, <8 x i64> {{[^)]+}}, <32 x i8> undef)
320302

321303
// 1-byte element gather using the mask - the mask is signed, which may
322304
// expose different issues/conflicts in gather API.
323305
simd<int32_t, 8> ioffsets = 1;
324306
v1 = gather<unsigned char, 8>(acc, ioffsets, 0, pred);
325-
// CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 {{[^)]+}}, i32 {{[^)]+}}, <8 x i32> {{[^)]+}}, <8 x i1> {{[^)]+}})
326-
// CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> {{[^)]+}}, i32 0, <8 x i64> {{[^)]+}}, <32 x i8> undef)
327307

328308
// 1-byte element scatter
329309
scatter<unsigned char, 8>(acc, offsets, v1, 100, pred);
330-
// CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
331-
// CHECK-STATEFUL: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]]
332-
// CHECK-STATEFUL: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]]
333-
// CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
334-
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> %{{[0-9a-zA-Z_.]+}})
310+
// CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
311+
// CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i32> splat (i32 101), <8 x i32> %{{[0-9a-zA-Z_.]+}})
312+
// CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, <8 x i64> undef, <32 x i8> %{{[0-9a-zA-Z_.]+}})
335313
}
336314
__esimd_fence(fence_mask::global_coherent_fence);
337315
// CHECK: call void @llvm.genx.fence(i8 1)

0 commit comments

Comments
 (0)