Skip to content

Commit e8906d0

Browse files
michalpaszkowskiigcbot
authored andcommitted
Fix i8/opaque pointer byte offset GEP scalarization in PrivateMemoryResolution
When LLVM IR uses opaque pointers or inserts a bitcast to i8*, a subsequent GEP is expressed in bytes. The legacy handleGEPInst always scalarized indices by starting from pGEP->getSourceElementType(). After the i8* cast, the type is i8, so the algorithm mistakenly treated the byte index as a count of elements, producing misscaled (too large) scalarized index. Example: %a = alloca [16 x [16 x float]], align 4 %b = bitcast [16 x [16 x float]]* %a to i8* %c = getelementptr inbounds i8, i8* %b, i64 64 Here, 64 is a byte offset into the original aggregate. The old implementation, seeing i8, scaled as if 64 elements, not 64 bytes. Yet, the meaningful base of the GEP is alloca's aggregate type [16 x [16 x float]] and the element-calculations should be based on this type. This change: 1. Introduces getFirstNonScalarSourceElementType(GEP), which walks back from the GEP base through pointer casts to find a root aggregate element type. 2. Adds additional handling in handleGEPInst, so that i8 GEP byte offset is converted to an element index of the underlying base type. This way the algorithm avoids basing element index scalarization on incidental i8* and keeps index calculation aligned with the underlying allocation layout. For reference, in typed pointer mode (or without the bitcast), the GEP would look like this: %a = alloca [16 x [16 x float]], align 4 %c = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* %a, i64 0, i64 1 Here, %c is the pointer to the 2nd inner array [16 x float]*.
1 parent bdd9b15 commit e8906d0

File tree

3 files changed

+76
-4
lines changed

3 files changed

+76
-4
lines changed

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -824,6 +824,25 @@ std::pair<unsigned int, Type *> TransposeHelper::getArrSizeAndEltType(Type *T) {
824824
return std::make_pair(arr_sz, retTy);
825825
}
826826

827+
Type *TransposeHelper::getFirstNonScalarSourceElementType(const GetElementPtrInst &GEP) {
828+
Type *currTy = GEP.getSourceElementType();
829+
if (getArrSizeAndEltType(currTy).first > 1)
830+
return currTy;
831+
832+
const Value *base = GEP.getPointerOperand()->stripPointerCasts();
833+
834+
if (const auto *AI = dyn_cast<AllocaInst>(base))
835+
return AI->getAllocatedType();
836+
if (const auto *GV = dyn_cast<GlobalVariable>(base))
837+
return GV->getValueType();
838+
if (const auto *LI = dyn_cast<LoadInst>(base))
839+
return LI->getType();
840+
if (const auto *SI = dyn_cast<StoreInst>(base))
841+
return SI->getValueOperand()->getType();
842+
843+
return currTy;
844+
}
845+
827846
void TransposeHelper::handleGEPInst(llvm::GetElementPtrInst *pGEP, llvm::Value *idx) {
828847
// TODO: Add support for GEP attributes: nsw, nuw, inbounds. Currently, neigher the old nor the new algorithm handles
829848
// them.
@@ -841,13 +860,38 @@ void TransposeHelper::handleGEPInst(llvm::GetElementPtrInst *pGEP, llvm::Value *
841860
return;
842861
}
843862

863+
IRBuilder<> IRB(pGEP);
864+
Value *pScalarizedIdx = IRB.getInt32(0);
865+
866+
// If the GEP is on i8, its index is a byte offset and must be converted to an element index of the underlying base
867+
// type.
868+
if (pGEP->getSourceElementType()->isIntegerTy(8)) {
869+
// Get the non-scalar/aggregate GEP source element type.
870+
Type *baseAggregateTy = getFirstNonScalarSourceElementType(*pGEP);
871+
// Find the scalar element type at the bottom of the aggregate.
872+
Type *elementTy = baseAggregateTy;
873+
while (elementTy->isStructTy() || elementTy->isArrayTy() || elementTy->isVectorTy()) {
874+
elementTy = getArrSizeAndEltType(elementTy).second;
875+
}
876+
elementTy = elementTy->getScalarType();
877+
uint32_t elementBytes = (uint32_t)m_DL.getTypeAllocSize(elementTy);
878+
879+
// The 1st operand is the byte offset, convert bytes to element count.
880+
Value *byteIndex = IRB.CreateZExtOrTrunc(pGEP->getOperand(1), IRB.getInt32Ty());
881+
if (elementBytes > 1)
882+
byteIndex = IRB.CreateUDiv(byteIndex, IRB.getInt32(elementBytes));
883+
884+
pScalarizedIdx = IRB.CreateAdd(pScalarizedIdx, byteIndex);
885+
pScalarizedIdx = IRB.CreateAdd(pScalarizedIdx, idx);
886+
HandleAllocaSources(pGEP, pScalarizedIdx);
887+
return;
888+
}
889+
844890
// Given %p = getelementptr [4 x [3 x <2 x float>]]* %v, i64 0, i64 %1, i64 %2
845891
// compute the scalarized index with an auxiliary array [4, 3, 2]:
846892
//
847893
// Formula: index = (%1 x 3 + %2) x 2
848894
//
849-
IRBuilder<> IRB(pGEP);
850-
Value *pScalarizedIdx = IRB.getInt32(0);
851895
Type *T = pGEP->getSourceElementType();
852896
for (unsigned i = 0, e = pGEP->getNumIndices(); i < e; ++i) {
853897
// If T is VectorType we should be at the last loop iteration. This will break things only if m_vectorIndex == true.

IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2024 Intel Corporation
3+
Copyright (C) 2017-2025 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -155,5 +155,6 @@ class TransposeHelper {
155155
private:
156156
bool m_vectorIndex;
157157
std::pair<unsigned int, llvm::Type *> getArrSizeAndEltType(llvm::Type *T);
158+
llvm::Type *getFirstNonScalarSourceElementType(const llvm::GetElementPtrInst &GEP);
158159
};
159160
} // namespace IGC
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --opaque-pointers --igc-private-mem-resolution --platformlnl -S %s | FileCheck %s
10+
11+
; This test ensures GEP scalarization on i8*/opaque ptr offsets treats the index as bytes and converts to element index via recovered base type size.
12+
13+
; CHECK-NOT: mul i32 64
14+
; CHECK: mul i32 16
15+
16+
define spir_kernel void @test() {
17+
%a = alloca [16 x [16 x float]], align 4
18+
%b = getelementptr inbounds i8, ptr %a, i64 64
19+
%c = getelementptr <8 x i32>, ptr %b, i32 0
20+
%d = load <8 x i32>, ptr %c, align 4
21+
ret void
22+
}
23+
24+
!igc.functions = !{!1}
25+
!1 = !{ptr @test, !2}
26+
!2 = !{!3}
27+
!3 = !{!"function_type", i32 0}

0 commit comments

Comments
 (0)