Skip to content

Commit a9f8143

Browse files
authored
[LoopInterchange] Ignore the cost-model, force interchange if legal (#148858)
This is and has been proven useful for testing purposes, to get more test coverage.
1 parent 4c70195 commit a9f8143

File tree

2 files changed

+63
-4
lines changed

2 files changed

+63
-4
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ enum class RuleTy {
7878
PerLoopCacheAnalysis,
7979
PerInstrOrderCost,
8080
ForVectorization,
81+
Ignore
8182
};
8283

8384
} // end anonymous namespace
@@ -106,14 +107,20 @@ static cl::list<RuleTy> Profitabilities(
106107
clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
107108
"Prioritize the IVs order of each instruction"),
108109
clEnumValN(RuleTy::ForVectorization, "vectorize",
109-
"Prioritize vectorization")));
110+
"Prioritize vectorization"),
111+
clEnumValN(RuleTy::Ignore, "ignore",
112+
"Ignore profitability, force interchange (does not "
113+
"work with other options)")));
110114

111115
#ifndef NDEBUG
112-
static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
116+
static bool noDuplicateRulesAndIgnore(ArrayRef<RuleTy> Rules) {
113117
SmallSet<RuleTy, 4> Set;
114-
for (RuleTy Rule : Rules)
118+
for (RuleTy Rule : Rules) {
115119
if (!Set.insert(Rule).second)
116120
return false;
121+
if (Rule == RuleTy::Ignore)
122+
return false;
123+
}
117124
return true;
118125
}
119126

@@ -1357,6 +1364,13 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
13571364
bool LoopInterchangeProfitability::isProfitable(
13581365
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
13591366
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1367+
1368+
// Return true if interchange is forced and the cost-model ignored.
1369+
if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
1370+
return true;
1371+
assert(noDuplicateRulesAndIgnore(Profitabilities) &&
1372+
"Duplicate rules and option 'ignore' are not allowed");
1373+
13601374
// isProfitable() is structured to avoid endless loop interchange. If the
13611375
// highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
13621376
// decide the profitability then, profitability check will stop and return the
@@ -1365,7 +1379,6 @@ bool LoopInterchangeProfitability::isProfitable(
13651379
// second highest priority rule (isProfitablePerInstrOrderCost by default).
13661380
// Likewise, if it failed to analysis the profitability then only, the last
13671381
// rule (isProfitableForVectorization by default) will decide.
1368-
assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules");
13691382
std::optional<bool> shouldInterchange;
13701383
for (RuleTy RT : Profitabilities) {
13711384
switch (RT) {
@@ -1382,6 +1395,9 @@ bool LoopInterchangeProfitability::isProfitable(
13821395
shouldInterchange =
13831396
isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
13841397
break;
1398+
case RuleTy::Ignore:
1399+
llvm_unreachable("Option 'ignore' is not supported with other options");
1400+
break;
13851401
}
13861402

13871403
// If this rule could determine the profitability, don't call subsequent
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; RUN: opt < %s -passes=loop-interchange -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore -S
2+
; RUN: FileCheck --input-file=%t %s
3+
4+
; There should be no reason to interchange this, unless it is forced.
5+
;
6+
; for (int i = 0; i<1024; i++)
7+
; for (int j = 0; j<1024; j++)
8+
; A[i][j] = 42;
9+
;
10+
; CHECK: --- !Passed
11+
; CHECK-NEXT: Pass: loop-interchange
12+
; CHECK-NEXT: Name: Interchanged
13+
; CHECK-NEXT: Function: f
14+
; CHECK-NEXT: Args:
15+
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
16+
; CHECK-NEXT: ...
17+
18+
@A = dso_local local_unnamed_addr global [1024 x [1024 x i32]] zeroinitializer, align 4
19+
20+
define dso_local void @f() local_unnamed_addr #0 {
21+
entry:
22+
br label %outer.header
23+
24+
outer.header:
25+
%i = phi i64 [ 0, %entry ], [ %i.next, %inner.header ]
26+
br label %inner.body
27+
28+
inner.header:
29+
%i.next = add nuw nsw i64 %i, 1
30+
%exitcond20.not = icmp eq i64 %i.next, 1024
31+
br i1 %exitcond20.not, label %exit, label %outer.header
32+
33+
inner.body:
34+
%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
35+
%arrayidx6 = getelementptr inbounds nuw [1024 x [1024 x i32]], ptr @A, i64 0, i64 %i, i64 %j
36+
store i32 42, ptr %arrayidx6, align 4
37+
%j.next = add nuw nsw i64 %j, 1
38+
%exitcond.not = icmp eq i64 %j.next, 1024
39+
br i1 %exitcond.not, label %inner.header, label %inner.body
40+
41+
exit:
42+
ret void
43+
}

0 commit comments

Comments
 (0)