Skip to content

Commit cedf0f9

Browse files
lioujheyuigcbot
authored andcommitted
Parameterize UnrollMaxCountForAllocai in GenTTI
Parameterize UnrollMaxCountForAllocai in GenTTI
1 parent 4c2e31a commit cedf0f9

File tree

3 files changed

+19
-11
lines changed

3 files changed

+19
-11
lines changed

IGC/Compiler/GenTTI.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
308308
// It can potentially do some global cost estimations.
309309
// TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
310310
// reduce register pressure.
311-
const unsigned UnrollMaxCountForAlloca = 64; // May need to be higher for OpenCL
311+
const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE(PromoteLoopUnrollwithAllocaCountThreshold);
312312
bool AllocaFound = false;
313313
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
314314
IGC_IS_FLAG_ENABLED(EnablePromoteLoopUnrollwithAlloca)) {
@@ -332,12 +332,16 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
332332

333333
if (!AI)
334334
continue;
335-
336-
Type *Ty = AI->getAllocatedType();
337-
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
338-
if (AllocaSize > 1024 || AllocaSize == 0)
335+
// Not fixed size or not in entry block
336+
// TODO: Can a alloca with a fixed size not reside in the entry block?
337+
if (!AI->isStaticAlloca())
338+
continue;
339+
// Assume every iteration consumes 1 alloca element.
340+
if (cast<ConstantInt>(AI->getArraySize())->getZExtValue() > UnrollMaxCountForAlloca)
339341
continue;
340342

343+
// Using alloca size in bytes as the threshold boost seems a bit tricky.
344+
unsigned AllocaSize = *(AI->getAllocationSizeInBits(DL)) / 8;
341345
ThresholdBoost += AllocaSize;
342346
if (GEP)
343347
isGEPLoopInduction[GEP] = true;
@@ -348,7 +352,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
348352
// LLVM default only to 10, boost to UnrollMaxCountForAlloca
349353
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
350354
UP.Threshold += ThresholdBoost;
351-
UP.Runtime = true;
352355
UP.UpperBound = true;
353356
UP.Force = true;
354357

IGC/common/igc_flags.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,10 @@ DECLARE_IGC_REGKEY(
449449
"Disable this flag makes them always cost something as well as disables dynamic threshold increase based on the "
450450
"size of alloca and number of GEP to the alloca in the loop, leading to the loop less likely to be unrolled.",
451451
false)
452+
DECLARE_IGC_REGKEY(DWORD, PromoteLoopUnrollwithAllocaCountThreshold, 256,
453+
"The loop trip count OR number of alloca elements cutoff to stop regkey "
454+
"EnablePromoteLoopUnrollwithAlloca (Check regkey description).",
455+
false)
452456
DECLARE_IGC_REGKEY(DWORD, SetRegisterPressureThresholdForLoopUnroll, 96,
453457
"Set the register pressure threshold for limiting the loop unroll to smaller loops", false)
454458
DECLARE_IGC_REGKEY(DWORD, SetBranchSwapThreshold, 400, "Set the branch swaping threshold.", false)

IGC/ocloc_tests/features/metadata_travel_check/user_private_var.cl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ SPDX-License-Identifier: MIT
99

1010
// windows unsupported due to issues on 32bit build, to be debugged.
1111
// UNSUPPORTED: system-windows
12+
// Disable loop unroll so that the private memory is not optimized out.
1213

1314
// checking the asm dump file
14-
// RUN: ocloc compile -file %s -options " -g -igc_opts 'VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
15+
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
1516
// checking the llvm-IR after EmitVISAPass
16-
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
17-
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
18-
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
19-
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
17+
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
18+
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
19+
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
20+
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
2021

2122
// Looking for the comment which informs about the amount of spill size
2223
// CHECK-ASM: //.private memory size

0 commit comments

Comments
 (0)