Skip to content

Commit bb2424f

Browse files
bokrzesiigcbot
authored andcommitted
[IGC Core] Adjustments of ScalarArgAsPointer pass after GEP leading zeros started being stripped
After this patch was released in upstream intel/llvm@055bfc0 The leading zeros in GEPs e.g ``` %arrayidx.i = getelementptr (...) %2, i64 0, i64 %idxprom.i ``` are being removed/transformed into: ``` %arrayidx.i = getelementptr (...) %2, i64 %idxprom.i ``` Such change affected assumptions made in ScalarArgAsPointer pass, so it required adjustments to "ArrayType finding" logic in order to work properly.
1 parent e7561db commit bb2424f

File tree

3 files changed

+130
-4
lines changed

3 files changed

+130
-4
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/ScalarArgAsPointer/ScalarArgAsPointer.cpp

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,13 +304,35 @@ bool ScalarArgAsPointerAnalysis::findStoredArgs(llvm::LoadInst &LI, ArgSet &args
304304
for (auto it = offsets.begin(); it != offsets.end(); ++it)
305305
*it += offset;
306306
} else {
307-
if (prevGTI == gep_type_end(GEPI))
308-
return false; // variable index at first operand, should not happen
309-
310307
// gep_type_iterator is used to query indexed type. For arrays this is type
311308
// of single element. To get array size, we need to do query for it at
312309
// previous iterator step (before stepping into type indexed by array).
313-
ArrayType *ATy = dyn_cast<ArrayType>(prevGTI.getIndexedType());
310+
ArrayType *ATy = nullptr;
311+
if (prevGTI != gep_type_end(GEPI))
312+
ATy = dyn_cast<ArrayType>(prevGTI.getIndexedType());
313+
314+
if (!ATy) {
315+
// If can't deduce array type,
316+
// then the type was modified due to stripping of leading zero indices from GEP,
317+
// So we have to extract it from alloca
318+
auto allocaType = AI->getAllocatedType();
319+
if (!allocaType)
320+
return false;
321+
322+
if (auto *structType = dyn_cast<StructType>(allocaType)) {
323+
for (auto offset : offsets) {
324+
auto structLayout = DL->getStructLayout(structType);
325+
auto indexOfArrayTyInStruct = structLayout->getElementContainingOffset(offset);
326+
auto candidateType = structType->getElementType(indexOfArrayTyInStruct);
327+
328+
if (candidateType && candidateType->isArrayTy()) {
329+
ATy = dyn_cast<ArrayType>(candidateType);
330+
break;
331+
}
332+
}
333+
}
334+
}
335+
314336
if (!ATy)
315337
return false;
316338

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt --opaque-pointers --igc-scalar-arg-as-pointer-analysis -igc-serialize-metadata -S %s | FileCheck %s
11+
;
12+
13+
; CHECK: !{!"m_OpenCLArgScalarAsPointersSet{{[[][0-9][]]}}", i32 5}
14+
; CHECK: !{!"m_OpenCLArgScalarAsPointersSet{{[[][0-9][]]}}", i32 6}
15+
16+
%struct.__generated_KernelArgWithPtr = type { [2 x ptr addrspace(1)], i32, i32 }
17+
%struct.KernelArgWithPtr = type { [2 x ptr addrspace(4)], i32, i32 }
18+
19+
; Function Attrs: convergent nounwind
20+
define spir_kernel void @test(
21+
ptr nocapture readonly byval(%struct.__generated_KernelArgWithPtr) align 8 %__arg_kArg, i64 %_arg_randIndex, <8 x i32> %r0, <3 x i32> %globalOffset, ptr %privateBase, i64 %const_reg_qword, i64 %const_reg_qword1, i32 %const_reg_dword, i32 %const_reg_dword2) #0 {
22+
entry:
23+
%agg.tmp1 = alloca %struct.KernelArgWithPtr, align 8
24+
25+
store i64 %const_reg_qword, ptr %agg.tmp1, align 8
26+
27+
%__arg_kArg_alloca.sroa.2.0..sroa_idx3 = getelementptr inbounds %struct.KernelArgWithPtr, ptr %agg.tmp1, i64 0, i32 0, i64 1
28+
store i64 %const_reg_qword1, ptr %__arg_kArg_alloca.sroa.2.0..sroa_idx3, align 8
29+
30+
%__arg_kArg_alloca.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.KernelArgWithPtr, ptr %agg.tmp1, i64 0, i32 1
31+
store i32 %const_reg_dword, ptr %__arg_kArg_alloca.sroa.3.0..sroa_idx4, align 8
32+
33+
%__arg_kArg_alloca.sroa.4.0..sroa_idx5 = getelementptr inbounds %struct.KernelArgWithPtr, ptr %agg.tmp1, i64 0, i32 2
34+
store i32 %const_reg_dword2, ptr %__arg_kArg_alloca.sroa.4.0..sroa_idx5, align 4
35+
36+
%add.i = add nsw i32 %const_reg_dword, %const_reg_dword2
37+
38+
%arrayidx.i = getelementptr inbounds ptr addrspace(4), ptr %agg.tmp1, i64 %_arg_randIndex
39+
40+
%load = load ptr addrspace(4), ptr %arrayidx.i, align 8
41+
%arrayidx8.i = getelementptr inbounds i32, ptr addrspace(4) %load, i64 5
42+
%spaceCast = addrspacecast ptr addrspace(4) %arrayidx8.i to ptr addrspace(1)
43+
store i32 15, ptr addrspace(1) %spaceCast, align 4
44+
45+
ret void
46+
}
47+
48+
!igc.functions = !{!0}
49+
50+
!0 = !{ptr @test, !1}
51+
!1 = !{!2}
52+
!2 = !{!"function_type", i32 0}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-14-plus
10+
; RUN: igc_opt --typed-pointers --igc-scalar-arg-as-pointer-analysis -igc-serialize-metadata -S %s | FileCheck %s
11+
;
12+
13+
; CHECK: !{!"m_OpenCLArgScalarAsPointersSet{{[[][0-9][]]}}", i32 5}
14+
; CHECK: !{!"m_OpenCLArgScalarAsPointersSet{{[[][0-9][]]}}", i32 6}
15+
16+
%struct.__generated_KernelArgWithPtr = type { [2 x i8 addrspace(1)*], i32, i32 }
17+
%struct.KernelArgWithPtr = type { [2 x i8 addrspace(4)*], i32, i32 }
18+
19+
; Function Attrs: convergent nounwind
20+
define spir_kernel void @test(
21+
%struct.__generated_KernelArgWithPtr* nocapture readonly byval(%struct.__generated_KernelArgWithPtr) align 8 %__arg_kArg, i64 %_arg_randIndex, <8 x i32> %r0, <3 x i32> %globalOffset, i8* %privateBase, i64 %const_reg_qword, i64 %const_reg_qword1, i32 %const_reg_dword, i32 %const_reg_dword2) #0 {
22+
entry:
23+
%agg.tmp1 = alloca %struct.KernelArgWithPtr, align 8
24+
25+
%0 = bitcast %struct.KernelArgWithPtr* %agg.tmp1 to i8*
26+
%__arg_kArg_alloca.sroa.0.0..sroa_cast = bitcast %struct.KernelArgWithPtr* %agg.tmp1 to i64*
27+
store i64 %const_reg_qword, i64* %__arg_kArg_alloca.sroa.0.0..sroa_cast, align 8
28+
%__arg_kArg_alloca.sroa.2.0..sroa_idx3 = getelementptr inbounds %struct.KernelArgWithPtr, %struct.KernelArgWithPtr* %agg.tmp1, i64 0, i32 0, i64 1
29+
%__arg_kArg_alloca.sroa.2.0..sroa_cast = bitcast i8 addrspace(4)** %__arg_kArg_alloca.sroa.2.0..sroa_idx3 to i64*
30+
store i64 %const_reg_qword1, i64* %__arg_kArg_alloca.sroa.2.0..sroa_cast, align 8
31+
%__arg_kArg_alloca.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.KernelArgWithPtr, %struct.KernelArgWithPtr* %agg.tmp1, i64 0, i32 1
32+
store i32 %const_reg_dword, i32* %__arg_kArg_alloca.sroa.3.0..sroa_idx4, align 8
33+
%__arg_kArg_alloca.sroa.4.0..sroa_idx5 = getelementptr inbounds %struct.KernelArgWithPtr, %struct.KernelArgWithPtr* %agg.tmp1, i64 0, i32 2
34+
store i32 %const_reg_dword2, i32* %__arg_kArg_alloca.sroa.4.0..sroa_idx5, align 4
35+
%add.i = add nsw i32 %const_reg_dword, %const_reg_dword2
36+
37+
%bitCast = bitcast %struct.KernelArgWithPtr* %agg.tmp1 to i32 addrspace(4)**
38+
%arrayidx.i = getelementptr inbounds i32 addrspace(4)*, i32 addrspace(4)** %bitCast, i64 %_arg_randIndex
39+
40+
%load = load i32 addrspace(4)*, i32 addrspace(4)** %arrayidx.i, align 8
41+
%arrayidx8.i = getelementptr inbounds i32, i32 addrspace(4)* %load, i64 5
42+
%spaceCast = addrspacecast i32 addrspace(4)* %arrayidx8.i to i32 addrspace(1)*
43+
store i32 15, i32 addrspace(1)* %spaceCast, align 4
44+
45+
ret void
46+
}
47+
48+
!igc.functions = !{!0}
49+
50+
!0 = !{void (%struct.__generated_KernelArgWithPtr*, i64, <8 x i32>, <3 x i32>, i8*, i64, i64, i32, i32)* @test, !1}
51+
!1 = !{!2}
52+
!2 = !{!"function_type", i32 0}

0 commit comments

Comments
 (0)