Parameterize UnrollMaxCountForAllocai in GenTTI

lioujheyu · igcbot · commit cedf0f970b43 · 2025-08-14T20:17:15.000+02:00
diff --git a/IGC/Compiler/GenTTI.cpp b/IGC/Compiler/GenTTI.cpp
@@ -308,7 +308,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   //       It can potentially do some global cost estimations.
   // TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
   //       reduce register pressure.
-  const unsigned UnrollMaxCountForAlloca = 64; // May need to be higher for OpenCL
+  const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE(PromoteLoopUnrollwithAllocaCountThreshold);
   bool AllocaFound = false;
   if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
       IGC_IS_FLAG_ENABLED(EnablePromoteLoopUnrollwithAlloca)) {
@@ -332,12 +332,16 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 
         if (!AI)
           continue;
-
-        Type *Ty = AI->getAllocatedType();
-        unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
-        if (AllocaSize > 1024 || AllocaSize == 0)
+        // Not fixed size or not in entry block
+        // TODO: Can a alloca with a fixed size not reside in the entry block?
+        if (!AI->isStaticAlloca())
+          continue;
+        // Assume every iteration consumes 1 alloca element.
+        if (cast<ConstantInt>(AI->getArraySize())->getZExtValue() > UnrollMaxCountForAlloca)
           continue;
 
+        // Using alloca size in bytes as the threshold boost seems a bit tricky.
+        unsigned AllocaSize = *(AI->getAllocationSizeInBits(DL)) / 8;
         ThresholdBoost += AllocaSize;
         if (GEP)
           isGEPLoopInduction[GEP] = true;
@@ -348,7 +352,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
       // LLVM default only to 10, boost to UnrollMaxCountForAlloca
       UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
       UP.Threshold += ThresholdBoost;
-      UP.Runtime = true;
       UP.UpperBound = true;
       UP.Force = true;
 
diff --git a/IGC/common/igc_flags.h b/IGC/common/igc_flags.h
@@ -449,6 +449,10 @@ DECLARE_IGC_REGKEY(
     "Disable this flag makes them always cost something as well as disables dynamic threshold increase based on the "
     "size of alloca and number of GEP to the alloca in the loop, leading to the loop less likely to be unrolled.",
     false)
+DECLARE_IGC_REGKEY(DWORD, PromoteLoopUnrollwithAllocaCountThreshold, 256,
+                   "The loop trip count OR number of alloca elements cutoff to stop regkey "
+                   "EnablePromoteLoopUnrollwithAlloca (Check regkey description).",
+                   false)
 DECLARE_IGC_REGKEY(DWORD, SetRegisterPressureThresholdForLoopUnroll, 96,
                    "Set the register pressure threshold for limiting the loop unroll to smaller loops", false)
 DECLARE_IGC_REGKEY(DWORD, SetBranchSwapThreshold, 400, "Set the branch swaping threshold.", false)
diff --git a/IGC/ocloc_tests/features/metadata_travel_check/user_private_var.cl b/IGC/ocloc_tests/features/metadata_travel_check/user_private_var.cl
@@ -9,14 +9,15 @@ SPDX-License-Identifier: MIT
 
 // windows unsupported due to issues on 32bit build, to be debugged.
 // UNSUPPORTED: system-windows
+// Disable loop unroll so that the private memory is not optimized out.
 
 // checking the asm dump file
-// RUN: ocloc compile -file %s -options " -g -igc_opts 'VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
 // checking the llvm-IR after EmitVISAPass
-// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
-// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
-// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
-// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
+// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
+// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
+// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
+// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
 
 // Looking for the comment which informs about the amount of spill size
 // CHECK-ASM: //.private memory size