diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index a5008907b9014..2b893032d232c 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -78,6 +78,7 @@ enum class RuleTy { PerLoopCacheAnalysis, PerInstrOrderCost, ForVectorization, + Ignore }; } // end anonymous namespace @@ -106,14 +107,20 @@ static cl::list Profitabilities( clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", - "Prioritize vectorization"))); + "Prioritize vectorization"), + clEnumValN(RuleTy::Ignore, "ignore", + "Ignore profitability, force interchange (does not " + "work with other options)"))); #ifndef NDEBUG -static bool noDuplicateRules(ArrayRef Rules) { +static bool noDuplicateRulesAndIgnore(ArrayRef Rules) { SmallSet Set; - for (RuleTy Rule : Rules) + for (RuleTy Rule : Rules) { if (!Set.insert(Rule).second) return false; + if (Rule == RuleTy::Ignore) + return false; + } return true; } @@ -1286,6 +1293,13 @@ std::optional LoopInterchangeProfitability::isProfitableForVectorization( bool LoopInterchangeProfitability::isProfitable( const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) { + + // Return true if interchange is forced and the cost-model ignored. + if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore) + return true; + assert(noDuplicateRulesAndIgnore(Profitabilities) && + "Duplicate rules and option 'ignore' are not allowed"); + // isProfitable() is structured to avoid endless loop interchange. If the // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could // decide the profitability then, profitability check will stop and return the @@ -1294,7 +1308,6 @@ bool LoopInterchangeProfitability::isProfitable( // second highest priority rule (isProfitablePerInstrOrderCost by default). // Likewise, if it failed to analysis the profitability then only, the last // rule (isProfitableForVectorization by default) will decide. - assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules"); std::optional shouldInterchange; for (RuleTy RT : Profitabilities) { switch (RT) { @@ -1311,6 +1324,9 @@ bool LoopInterchangeProfitability::isProfitable( shouldInterchange = isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix); break; + case RuleTy::Ignore: + llvm_unreachable("Option 'ignore' is not supported with other options"); + break; } // If this rule could determine the profitability, don't call subsequent diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll new file mode 100644 index 0000000000000..c33ecdf7d9905 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -passes=loop-interchange -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore -S +; RUN: FileCheck --input-file=%t %s + +; There should be no reason to interchange this, unless it is forced. +; +; for (int i = 0; i<1024; i++) +; for (int j = 0; j<1024; j++) +; A[i][j] = 42; +; +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-interchange +; CHECK-NEXT: Name: Interchanged +; CHECK-NEXT: Function: f +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Loop interchanged with enclosing loop. +; CHECK-NEXT: ... + +@A = dso_local local_unnamed_addr global [1024 x [1024 x i32]] zeroinitializer, align 4 + +define dso_local void @f() local_unnamed_addr #0 { +entry: + br label %outer.header + +outer.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %inner.header ] + br label %inner.body + +inner.header: + %i.next = add nuw nsw i64 %i, 1 + %exitcond20.not = icmp eq i64 %i.next, 1024 + br i1 %exitcond20.not, label %exit, label %outer.header + +inner.body: + %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ] + %arrayidx6 = getelementptr inbounds nuw [1024 x [1024 x i32]], ptr @A, i64 0, i64 %i, i64 %j + store i32 42, ptr %arrayidx6, align 4 + %j.next = add nuw nsw i64 %j, 1 + %exitcond.not = icmp eq i64 %j.next, 1024 + br i1 %exitcond.not, label %inner.header, label %inner.body + +exit: + ret void +}