Skip to content

Commit fdae5ad

Browse files
committed
[LLVM] DemandedBits: Propagate demanded bits through div/rem ops
1 parent 977cfea commit fdae5ad

File tree

2 files changed

+319
-0
lines changed

2 files changed

+319
-0
lines changed

llvm/lib/Analysis/DemandedBits.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/Support/Casting.h"
3737
#include "llvm/Support/Debug.h"
3838
#include "llvm/Support/KnownBits.h"
39+
#include "llvm/Support/MathExtras.h"
3940
#include "llvm/Support/raw_ostream.h"
4041
#include <algorithm>
4142
#include <cstdint>
@@ -246,6 +247,63 @@ void DemandedBits::determineLiveOperandBits(
246247
else
247248
AB &= ~(Known.One & ~Known2.One);
248249
break;
250+
case Instruction::UDiv:
251+
case Instruction::URem:
252+
case Instruction::SDiv:
253+
case Instruction::SRem: {
254+
auto Opc = UserI->getOpcode();
255+
auto IsDiv = Opc == Instruction::UDiv || Opc == Instruction::SDiv;
256+
bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
257+
if (OperandNo == 0) {
258+
const APInt *DivAmnt;
259+
if (match(UserI->getOperand(1), m_APInt(DivAmnt))) {
260+
uint64_t D = DivAmnt->getZExtValue();
261+
if (isPowerOf2_64(D)) {
262+
unsigned Sh = Log2_64(D);
263+
if (IsDiv) {
264+
AB = AOut.shl(Sh);
265+
} else {
266+
AB = AOut & APInt::getLowBitsSet(BitWidth, Sh);
267+
}
268+
} else { // Non power of 2 constant div
269+
/*
270+
* x = q * C + r;
271+
* q = x / C;
272+
* We think of it like grade school division in base 2.
273+
*
274+
x = [ unused | window m-bits | ... | needed bits ]
275+
^ each step emits 1 quotient bit
276+
|
277+
|
278+
C fits in m = ⌈log₂ C⌉ bits |
279+
Each new quotient bit consumes the window of m low bits and
280+
shifts one position left.
281+
282+
To produce the first LowQ quotient/rem bits we slide the window
283+
LowQ times --> need at most LowQ + m low bits of the dividend.
284+
Need = LowQ + Ceil(log2(C)) (+1 sign bit for sdiv/srem).
285+
For example :
286+
Assume x = b7 b6 b5 b4 b3 b2 b1 b0.
287+
LowQ = 4, C = 5 and ceil(log_2(C)) = 3.
288+
step 0: b2 b1 b0, produces quotient q[0].
289+
step 1: b3 b2 b1, produces quotient q[1].
290+
step 2: b4 b3 b2, produces quotient q[2].
291+
step 3: b5 b4 b3, produces quotient q[3].
292+
k = LowQ - 1;
293+
TopIndex = k + m-1 = 3 + 2 = 5;
294+
The dividend bits b5...b0 are enough we don't care for b6 and b7.
295+
The same applies to Urem/SRem
296+
* */
297+
unsigned LowQ = AOut.getActiveBits();
298+
unsigned Need = LowQ + Log2_64_Ceil(D);
299+
if (IsSigned)
300+
Need++;
301+
AB = APInt::getLowBitsSet(BitWidth, std::min(BitWidth, Need));
302+
}
303+
}
304+
}
305+
break;
306+
}
249307
case Instruction::Xor:
250308
case Instruction::PHI:
251309
AB = AOut;
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
2+
3+
define i8 @test_sdiv_const_amount_4(i32 %a) {
4+
; CHECK-LABEL: 'test_sdiv_const_amount_4'
5+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
6+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
7+
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, 4
8+
; CHECK-DAG: DemandedBits: 0x3fc for %a in %div = sdiv i32 %a, 4
9+
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %div = sdiv i32 %a, 4
10+
;
11+
%div = sdiv i32 %a, 4
12+
%div.t = trunc i32 %div to i8
13+
ret i8 %div.t
14+
}
15+
16+
define i8 @test_sdiv_const_amount_5(i32 %a) {
17+
; CHECK-LABEL: 'test_sdiv_const_amount_5'
18+
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, 5
19+
; CHECK-DAG: DemandedBits: 0xfff for %a in %div = sdiv i32 %a, 5
20+
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %div = sdiv i32 %a, 5
21+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
22+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
23+
;
24+
%div = sdiv i32 %a, 5
25+
%div.t = trunc i32 %div to i8
26+
ret i8 %div.t
27+
}
28+
29+
define i8 @test_sdiv_const_amount_8(i32 %a) {
30+
; CHECK-LABEL: 'test_sdiv_const_amount_8'
31+
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, 8
32+
; CHECK-DAG: DemandedBits: 0x7f8 for %a in %div = sdiv i32 %a, 8
33+
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %div = sdiv i32 %a, 8
34+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
35+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
36+
;
37+
%div = sdiv i32 %a, 8
38+
%div.t = trunc i32 %div to i8
39+
ret i8 %div.t
40+
}
41+
42+
define i8 @test_sdiv_const_amount_9(i32 %a) {
43+
; CHECK-LABEL: 'test_sdiv_const_amount_9'
44+
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 9
45+
; CHECK-DAG: DemandedBits: 0xfff for %a in %div = udiv i32 %a, 9
46+
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %div = udiv i32 %a, 9
47+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
48+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
49+
;
50+
%div = udiv i32 %a, 9
51+
%div.t = trunc i32 %div to i8
52+
ret i8 %div.t
53+
}
54+
55+
define i8 @test_sdiv(i32 %a, i32 %b) {
56+
; CHECK-LABEL: 'test_sdiv'
57+
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, %b
58+
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %div = sdiv i32 %a, %b
59+
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %div = sdiv i32 %a, %b
60+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
61+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
62+
;
63+
%div = sdiv i32 %a, %b
64+
%div.t = trunc i32 %div to i8
65+
ret i8 %div.t
66+
}
67+
68+
define i8 @test_udiv_const_amount_4(i32 %a) {
69+
; CHECK-LABEL: 'test_udiv_const_amount_4'
70+
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 4
71+
; CHECK-DAG: DemandedBits: 0x3fc for %a in %div = udiv i32 %a, 4
72+
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %div = udiv i32 %a, 4
73+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
74+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
75+
;
76+
%div = udiv i32 %a, 4
77+
%div.t = trunc i32 %div to i8
78+
ret i8 %div.t
79+
}
80+
81+
define i8 @test_udiv_const_amount_5(i32 %a) {
82+
; CHECK-LABEL: 'test_udiv_const_amount_5'
83+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
84+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
85+
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 5
86+
; CHECK-DAG: DemandedBits: 0x7ff for %a in %div = udiv i32 %a, 5
87+
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %div = udiv i32 %a, 5
88+
;
89+
%div = udiv i32 %a, 5
90+
%div.t = trunc i32 %div to i8
91+
ret i8 %div.t
92+
}
93+
94+
define i8 @test_udiv_const_amount_8(i32 %a) {
95+
; CHECK-LABEL: 'test_udiv_const_amount_8'
96+
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 8
97+
; CHECK-DAG: DemandedBits: 0x7f8 for %a in %div = udiv i32 %a, 8
98+
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %div = udiv i32 %a, 8
99+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
100+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
101+
;
102+
%div = udiv i32 %a, 8
103+
%div.t = trunc i32 %div to i8
104+
ret i8 %div.t
105+
}
106+
107+
define i8 @test_udiv_const_amount_9(i32 %a) {
108+
; CHECK-LABEL: 'test_udiv_const_amount_9'
109+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
110+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
111+
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 9
112+
; CHECK-DAG: DemandedBits: 0xfff for %a in %div = udiv i32 %a, 9
113+
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %div = udiv i32 %a, 9
114+
;
115+
%div = udiv i32 %a, 9
116+
%div.t = trunc i32 %div to i8
117+
ret i8 %div.t
118+
}
119+
120+
define i8 @test_udiv(i32 %a, i32 %b) {
121+
; CHECK-LABEL: 'test_udiv'
122+
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, %b
123+
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %div = udiv i32 %a, %b
124+
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %div = udiv i32 %a, %b
125+
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
126+
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
127+
;
128+
%div = udiv i32 %a, %b
129+
%div.t = trunc i32 %div to i8
130+
ret i8 %div.t
131+
}
132+
133+
define i8 @test_srem_const_amount_4(i32 %a) {
134+
; CHECK-LABEL: 'test_srem_const_amount_4'
135+
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 4
136+
; CHECK-DAG: DemandedBits: 0x3 for %a in %rem = srem i32 %a, 4
137+
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %rem = srem i32 %a, 4
138+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
139+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
140+
;
141+
%rem = srem i32 %a, 4
142+
%rem.t = trunc i32 %rem to i8
143+
ret i8 %rem.t
144+
}
145+
146+
define i8 @test_srem_const_amount_5(i32 %a) {
147+
; CHECK-LABEL: 'test_srem_const_amount_5'
148+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
149+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
150+
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 5
151+
; CHECK-DAG: DemandedBits: 0xfff for %a in %rem = srem i32 %a, 5
152+
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %rem = srem i32 %a, 5
153+
;
154+
%rem = srem i32 %a, 5
155+
%rem.t = trunc i32 %rem to i8
156+
ret i8 %rem.t
157+
}
158+
159+
define i8 @test_srem_const_amount_8(i32 %a) {
160+
; CHECK-LABEL: 'test_srem_const_amount_8'
161+
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 8
162+
; CHECK-DAG: DemandedBits: 0x7 for %a in %rem = srem i32 %a, 8
163+
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %rem = srem i32 %a, 8
164+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
165+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
166+
;
167+
%rem = srem i32 %a, 8
168+
%rem.t = trunc i32 %rem to i8
169+
ret i8 %rem.t
170+
}
171+
172+
define i8 @test_srem_const_amount_9(i32 %a) {
173+
; CHECK-LABEL: 'test_srem_const_amount_9'
174+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
175+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
176+
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 9
177+
; CHECK-DAG: DemandedBits: 0x1fff for %a in %rem = srem i32 %a, 9
178+
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %rem = srem i32 %a, 9
179+
;
180+
%rem = srem i32 %a, 9
181+
%rem.t = trunc i32 %rem to i8
182+
ret i8 %rem.t
183+
}
184+
185+
define i8 @test_srem(i32 %a, i32 %b) {
186+
; CHECK-LABEL: 'test_srem'
187+
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, %b
188+
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = srem i32 %a, %b
189+
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %rem = srem i32 %a, %b
190+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
191+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
192+
;
193+
%rem = srem i32 %a, %b
194+
%rem.t = trunc i32 %rem to i8
195+
ret i8 %rem.t
196+
}
197+
198+
define i8 @test_urem_const_amount_4(i32 %a) {
199+
; CHECK-LABEL: 'test_urem_const_amount_4'
200+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
201+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
202+
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 4
203+
; CHECK-DAG: DemandedBits: 0x3 for %a in %rem = urem i32 %a, 4
204+
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %rem = urem i32 %a, 4
205+
;
206+
%rem = urem i32 %a, 4
207+
%rem.t = trunc i32 %rem to i8
208+
ret i8 %rem.t
209+
}
210+
211+
define i8 @test_urem_const_amount_5(i32 %a) {
212+
; CHECK-LABEL: 'test_urem_const_amount_5'
213+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
214+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
215+
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 5
216+
; CHECK-DAG: DemandedBits: 0x7ff for %a in %rem = urem i32 %a, 5
217+
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %rem = urem i32 %a, 5
218+
;
219+
%rem = urem i32 %a, 5
220+
%rem.t = trunc i32 %rem to i8
221+
ret i8 %rem.t
222+
}
223+
224+
define i8 @test_urem_const_amount_8(i32 %a) {
225+
; CHECK-LABEL: 'test_urem_const_amount_8'
226+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
227+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
228+
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 8
229+
; CHECK-DAG: DemandedBits: 0x7 for %a in %rem = urem i32 %a, 8
230+
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %rem = urem i32 %a, 8
231+
;
232+
%rem = urem i32 %a, 8
233+
%rem.t = trunc i32 %rem to i8
234+
ret i8 %rem.t
235+
}
236+
237+
define i8 @test_urem_const_amount_9(i32 %a) {
238+
; CHECK-LABEL: 'test_urem_const_amount_9'
239+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
240+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
241+
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 9
242+
; CHECK-DAG: DemandedBits: 0xfff for %a in %rem = urem i32 %a, 9
243+
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %rem = urem i32 %a, 9
244+
;
245+
%rem = urem i32 %a, 9
246+
%rem.t = trunc i32 %rem to i8
247+
ret i8 %rem.t
248+
}
249+
250+
define i8 @test_urem(i32 %a, i32 %b) {
251+
; CHECK-LABEL: 'test_urem'
252+
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, %b
253+
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = urem i32 %a, %b
254+
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %rem = urem i32 %a, %b
255+
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
256+
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
257+
;
258+
%rem = urem i32 %a, %b
259+
%rem.t = trunc i32 %rem to i8
260+
ret i8 %rem.t
261+
}

0 commit comments

Comments
 (0)