@@ -101,15 +101,13 @@ test_mem_intrins(int *addr, const vec<float, 8> &xf,
101
101
{
102
102
uint32_t offset = 128 ;
103
103
vec<int , 8 > x = __esimd_slm_block_ld<int , 8 , 32 >(offset);
104
- // CHECK: %[[VAR_OFF1:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3)
105
- // CHECK-NEXT: load <8 x i32>, ptr addrspace(3) %[[VAR_OFF1]], align 32
104
+ // CHECK: load <8 x i32>, ptr addrspace(3) inttoptr (i32 128 to ptr addrspace(3)), align 32
106
105
use (x);
107
106
}
108
107
{
109
108
uint32_t offset = 256 ;
110
109
__esimd_slm_block_st<int , 8 , 4 >(offset, get8i ());
111
- // CHECK: %[[VAR_OFF2:[0-9a-zA-Z_.]+]] = inttoptr i32 %{{[a-zA-Z0-9.]+}} to ptr addrspace(3)
112
- // CHECK-NEXT: store <8 x i32> %{{[a-zA-Z0-9.]+}}, ptr addrspace(3) %[[VAR_OFF2]], align 4
110
+ // CHECK: store <8 x i32> %call16, ptr addrspace(3) inttoptr (i32 256 to ptr addrspace(3)), align 4
113
111
}
114
112
{
115
113
auto x = __esimd_svm_gather<unsigned char , 8 >(get8ui64 (), get8ui16 ());
@@ -210,40 +208,36 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
210
208
v_addr += offsets;
211
209
212
210
__esimd_svm_atomic0<atomic_op::inc, uint32_t , VL>(v_addr.data (), pred.data ());
213
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}} , <32 x i64> %{{[0-9a-zA-Z_.]+}} , <32 x i32> undef)
211
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> undef , <32 x i64> zeroinitializer , <32 x i32> undef)
214
212
215
213
__esimd_svm_atomic1<atomic_op::add, uint32_t , VL>(v_addr.data (), v1.data (),
216
214
pred.data ());
217
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}} , <32 x i64> %{{[0-9a-zA-Z_.]+}} , <32 x i32> %{{[0-9a-zA-Z_.]+}} , <32 x i32> undef)
215
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> undef , <32 x i64> zeroinitializer , <32 x i32> zeroinitializer , <32 x i32> undef)
218
216
__esimd_svm_atomic2<atomic_op::cmpxchg, uint32_t , VL>(
219
217
v_addr.data (), v1.data (), v1.data (), pred.data ());
220
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}} , <32 x i64> %{{[0-9a-zA-Z_.]+}} , <32 x i32> %{{[0-9a-zA-Z_.]+}} , <32 x i32> %{{[0-9a-zA-Z_.]+}} , <32 x i32> undef)
218
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> undef , <32 x i64> zeroinitializer , <32 x i32> zeroinitializer , <32 x i32> zeroinitializer , <32 x i32> undef)
221
219
222
220
simd<uint32_t , VL> v00 = __esimd_svm_block_ld<uint32_t , VL, 4 >(vec_ptr);
223
- // CHECK: %[[VAR1:[0-9a-zA-Z_.]+]] = load <32 x i32>, ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 4
224
221
__esimd_svm_block_st<uint32_t , VL, 128 >(vec_ptr, v00.data ());
225
- // CHECK-NEXT: store <32 x i32> %[[VAR1]], ptr addrspace(4) %{{[a-zA-Z0-9.]+}}, align 128
226
222
227
223
simd<uint32_t , VL> v01 =
228
224
__esimd_svm_gather<uint32_t , VL>(v_addr.data (), pred.data ());
229
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}} , i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}} , <32 x i32> undef)
225
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.gather.v32i32.v32i1.v32i64(<32 x i1> undef , i32 0, <32 x i64> zeroinitializer , <32 x i32> undef)
230
226
231
227
__esimd_svm_scatter<uint32_t , VL>(v_addr.data (), v01.data (), pred.data ());
232
- // CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> %{{[0-9a-zA-Z_.]+}} , i32 0, <32 x i64> %{{[0-9a-zA-Z_.]+}} , <32 x i32> %{{[0-9a-zA-Z_.]+}})
228
+ // CHECK: call void @llvm.genx.svm.scatter.v32i1.v32i64.v32i32(<32 x i1> undef , i32 0, <32 x i64> zeroinitializer , <32 x i32> %{{[0-9a-zA-Z_.]+}})
233
229
234
230
simd<short , 16 > mina (0 , 1 );
235
231
simd<short , 16 > minc (5 );
236
232
minc = __esimd_smin<short , 16 >(mina.data (), minc.data ());
237
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i16> @llvm.genx.smin.v16i16.v16i16(<16 x i16> %{{[0-9a-zA-Z_.]+}}, <16 x i16> %{{[0-9a-zA-Z_.]+}})
238
233
239
234
simd<float , 1 > diva (2 .f );
240
235
simd<float , 1 > divb (1 .f );
241
236
diva = __esimd_ieee_div<float , 1 >(diva.data (), divb.data ());
242
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <1 x float> @llvm.genx.ieee.div.v1f32(<1 x float> %{{[0-9a-zA-Z_.]+}}, <1 x float> %{{[0-9a-zA-Z_.]+}})
243
237
244
238
simd<float , 16 > a (0 .1f );
245
239
simd<float , 8 > b = __esimd_rdregion<float , 16 , 8 , 0 , 8 , 1 >(a.data (), 0 );
246
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> %{{[0-9a-zA-Z_.]+}} , i32 0, i32 8, i32 1, i16 0, i32 0)
240
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> splat (float 0x3FB99999A0000000) , i32 0, i32 8, i32 1, i16 0, i32 0)
247
241
248
242
simd<float , 16 > c (0 .0f );
249
243
@@ -261,21 +255,17 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
261
255
auto d = __esimd_wrregion<float , 16 /* ret size*/ , 8 /* write size*/ ,
262
256
0 /* vstride*/ , 8 /* row width*/ , 1 /* hstride*/ >(
263
257
c.data () /* dst*/ , b.data () /* src*/ , 0 /* offset*/ );
264
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> %{{[0-9a-zA-Z_.]+}} , <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> splat (i1 true))
258
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x float> @llvm.genx.wrregionf.v16f32.v8f32.i16.v8i1(<16 x float> zeroinitializer , <8 x float> %{{[0-9a-zA-Z_.]+}}, i32 0, i32 8, i32 1, i16 0, i32 0, <8 x i1> splat (i1 true))
265
259
266
260
simd<int , 32 > va;
267
261
va = media_block_load<int , 4 , 8 >(pA, x, y);
268
- // CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %{{[0-9a-zA-Z_.]+}})
269
- // CHECK: store i32 %[[SI0_VAL]], ptr addrspace(4) %[[SI0_ADDR:[0-9a-zA-Z_.]+]]
270
- // CHECK: %[[SI0:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI0_ADDR]]
271
- // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %[[SI0]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}})
262
+ // CHECK: %[[SI0_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) undef)
263
+ // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.media.ld.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 0, i32 0)
272
264
273
265
simd<int , 32 > vb = va + 1 ;
274
266
media_block_store<int , 4 , 8 >(pB, x, y, vb);
275
- // CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %{{[0-9a-zA-Z_.]+}})
276
- // CHECK: store i32 %[[SI2_VAL]], ptr addrspace(4) %[[SI2_ADDR:[0-9a-zA-Z_.]+]]
277
- // CHECK: %[[SI2:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI2_ADDR]]
278
- // CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %[[SI2]], i32 0, i32 32, i32 %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}})
267
+ // CHECK: %[[SI2_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) undef)
268
+ // CHECK: call void @llvm.genx.media.st.v32i32(i32 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, i32 32, i32 0, i32 0, <32 x i32> %{{[0-9a-zA-Z_.]+}})
279
269
280
270
auto ee = __esimd_vload<int , 16 >((detail::vector_type_t <int , 16 > *)(&vg));
281
271
// CHECK: %{{[0-9a-zA-Z_.]+}} = call <16 x i32> @llvm.genx.vload.v16i32.p0(ptr {{.*}})
@@ -291,47 +281,35 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd<float, 16> foo() {
291
281
292
282
// 4-byte element gather
293
283
simd<int , 8 > v = gather<int , 8 >(acc, offsets, 100 );
294
- // CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
295
- // CHECK-STATEFUL: store i32 %[[SI3_VAL]], ptr addrspace(4) %[[SI3_ADDR:[0-9a-zA-Z_.]+]]
296
- // CHECK-STATEFUL: %[[SI3:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI3_ADDR]]
297
- // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
298
- // CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> undef)
284
+ // CHECK-STATEFUL: %[[SI3_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
285
+ // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 2, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 100, <8 x i32> splat (i32 1), <8 x i1> splat (i1 true))
286
+ // CHECK-STATELESS: call <8 x i32> @llvm.genx.svm.gather.v8i32.v8i1.v8i64(<8 x i1> splat (i1 true), i32 0, <8 x i64> undef, <8 x i32> undef)
299
287
300
288
// 4-byte element scatter
301
289
scatter<int , 8 >(acc, offsets, v, 100 , pred);
302
- // CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
303
- // CHECK-STATEFUL: store i32 %[[SI4_VAL]], ptr addrspace(4) %[[SI4_ADDR:[0-9a-zA-Z_.]+]]
304
- // CHECK-STATEFUL: %[[SI4:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI4_ADDR]]
305
- // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
306
- // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
290
+ // CHECK-STATEFUL: %[[SI4_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
291
+ // CHECK STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 2, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i32> splat (i32 101), <8 x i32> %{{[0-9a-zA-Z_.]+}})
292
+ // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8i32(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, <8 x i64> undef, <8 x i32> %{{[0-9a-zA-Z_.]+}})
307
293
308
294
// 1-byte element gather: same code with and without mask
309
295
simd<unsigned char , 8 > v1 = gather<unsigned char , 8 >(acc, offsets, 100 );
310
- // CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
311
- // CHECK-STATEFUL: store i32 %[[SI5_VAL]], ptr addrspace(4) %[[SI5_ADDR:[0-9a-zA-Z_.]+]]
312
- // CHECK-STATEFUL: %[[SI5:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI5_ADDR]]
313
- // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i1> %{{[0-9a-zA-Z_.]+}})
314
- // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> undef)
296
+ // CHECK-STATEFUL: %[[SI5_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
297
+ // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i32> splat (i32 1), <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>)
298
+ // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, <8 x i64> undef, <32 x i8> undef)
315
299
316
300
// 1-byte element gather using the mask
317
301
v1 = gather<unsigned char , 8 >(acc, offsets, 100 , pred);
318
- // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 {{[^)]+}}, i32 {{[^)]+}}, <8 x i32> {{[^)]+}}, <8 x i1> {{[^)]+}})
319
- // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> {{[^)]+}}, i32 0, <8 x i64> {{[^)]+}}, <32 x i8> undef)
320
302
321
303
// 1-byte element gather using the mask - the mask is signed, which may
322
304
// expose different issues/conflicts in gather API.
323
305
simd<int32_t , 8 > ioffsets = 1 ;
324
306
v1 = gather<unsigned char , 8 >(acc, ioffsets, 0 , pred);
325
- // CHECK-STATEFUL: call <8 x i32> @llvm.genx.gather.masked.scaled2.v8i32.v8i32.v8i1(i32 0, i16 0, i32 {{[^)]+}}, i32 {{[^)]+}}, <8 x i32> {{[^)]+}}, <8 x i1> {{[^)]+}})
326
- // CHECK-STATELESS: call <32 x i8> @llvm.genx.svm.gather.v32i8.v8i1.v8i64(<8 x i1> {{[^)]+}}, i32 0, <8 x i64> {{[^)]+}}, <32 x i8> undef)
327
307
328
308
// 1-byte element scatter
329
309
scatter<unsigned char , 8 >(acc, offsets, v1, 100 , pred);
330
- // CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef %{{[0-9a-zA-Z_.]+}})
331
- // CHECK-STATEFUL: store i32 %[[SI6_VAL]], ptr addrspace(4) %[[SI6_ADDR:[0-9a-zA-Z_.]+]]
332
- // CHECK-STATEFUL: %[[SI6:[0-9a-zA-Z_.]+]] = load i32, ptr addrspace(4) %[[SI6_ADDR]]
333
- // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}})
334
- // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i8> %{{[0-9a-zA-Z_.]+}})
310
+ // CHECK-STATEFUL: %[[SI6_VAL:[0-9a-zA-Z_.]+]] = call spir_func noundef i32 @_Z21__spirv_ConvertPtrToU{{.*}}(ptr addrspace(1) noundef undef)
311
+ // CHECK-STATEFUL: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, i16 0, i32 %{{[0-9a-zA-Z_.]+}}, i32 0, <8 x i32> splat (i32 101), <8 x i32> %{{[0-9a-zA-Z_.]+}})
312
+ // CHECK-STATELESS: call void @llvm.genx.svm.scatter.v8i1.v8i64.v32i8(<8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, i32 0, <8 x i64> undef, <32 x i8> %{{[0-9a-zA-Z_.]+}})
335
313
}
336
314
__esimd_fence (fence_mask::global_coherent_fence);
337
315
// CHECK: call void @llvm.genx.fence(i8 1)
0 commit comments