Skip to content
54 changes: 54 additions & 0 deletions llvm/lib/Analysis/DemandedBits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
Expand Down Expand Up @@ -246,6 +247,59 @@ void DemandedBits::determineLiveOperandBits(
else
AB &= ~(Known.One & ~Known2.One);
break;
case Instruction::SRem:
case Instruction::URem:
case Instruction::UDiv:
case Instruction::SDiv: {
auto Opc = UserI->getOpcode();
auto IsDiv = Opc == Instruction::UDiv || Opc == Instruction::SDiv;
bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
if (OperandNo == 0) {
const APInt *DivAmnt;
if (match(UserI->getOperand(1), m_APInt(DivAmnt))) {
uint64_t D = DivAmnt->getZExtValue();
if (DivAmnt->isPowerOf2()) {
unsigned Sh = DivAmnt->countr_zero();
if (IsDiv) {
AB = AOut.shl(Sh);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not correct for signed division. Simple example:

(-1 s/ 2) & 1 is 0.
(-1&2 s/ 2) & 1 is 1.

} else {
AB = AOut & APInt::getLowBitsSet(BitWidth, Sh);
}
} else if (IsDiv) { // Non power of 2 constant div
// x = q * C + r;
// q = x / C;
// We think of it like grade school division in base 2.
//
// x = [ unused | window m-bits | ... | needed bits ]
// ^ each step emits 1 quotient bit
// |
// |
// C fits in m = ⌈log2 C⌉ bits
// Each new quotient bit consumes the window of m low bits and
// shifts one position left.

// To produce the first LowQ quotient/rem bits we slide the window
// LowQ times --> need at most LowQ + m low bits of the dividend.
// Need = LowQ + Ceil(log2(C)) (+1 sign bit for
// sdiv/srem). For example : Assume x = b7 b6 b5 b4 b3 b2 b1 b0.
// LowQ = 4, C = 5 and ceil(log_2(C)) = 3.
// step 0: b2 b1 b0, produces quotient q[0].
// step 1: b3 b2 b1, produces quotient q[1].
// step 2: b4 b3 b2, produces quotient q[2].
// step 3: b5 b4 b3, produces quotient q[3].
// k = LowQ - 1;
// TopIndex = k + m-1 = 3 + 2 = 5;
// The dividend bits b5...b0 are enough we don't care for b6 and b7.
unsigned LowQ = AOut.getActiveBits();
unsigned Need = LowQ + DivAmnt->ceilLogBase2();
if (IsSigned)
Need++;
AB = APInt::getLowBitsSet(BitWidth, std::min(BitWidth, Need));
}
}
}
break;
}
case Instruction::Xor:
case Instruction::PHI:
AB = AOut;
Expand Down
266 changes: 266 additions & 0 deletions llvm/test/Analysis/DemandedBits/div_rem.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s

define i8 @test_sdiv_const_amount_4(i32 %a) {
; CHECK-LABEL: 'test_sdiv_const_amount_4'
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, 4
; CHECK-DAG: DemandedBits: 0x3fc for %a in %div = sdiv i32 %a, 4
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %div = sdiv i32 %a, 4
;
%div = sdiv i32 %a, 4
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_sdiv_const_amount_5(i32 %a) {
; CHECK-LABEL: 'test_sdiv_const_amount_5'
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, 5
; CHECK-DAG: DemandedBits: 0xfff for %a in %div = sdiv i32 %a, 5
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %div = sdiv i32 %a, 5
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = sdiv i32 %a, 5
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_sdiv_const_amount_8(i32 %a) {
; CHECK-LABEL: 'test_sdiv_const_amount_8'
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, 8
; CHECK-DAG: DemandedBits: 0x7f8 for %a in %div = sdiv i32 %a, 8
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %div = sdiv i32 %a, 8
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = sdiv i32 %a, 8
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_sdiv_const_amount_9(i32 %a) {
; CHECK-LABEL: 'test_sdiv_const_amount_9'
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 9
; CHECK-DAG: DemandedBits: 0xfff for %a in %div = udiv i32 %a, 9
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %div = udiv i32 %a, 9
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = udiv i32 %a, 9
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_sdiv(i32 %a, i32 %b) {
; CHECK-LABEL: 'test_sdiv'
; CHECK-DAG: DemandedBits: 0xff for %div = sdiv i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %div = sdiv i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %div = sdiv i32 %a, %b
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = sdiv i32 %a, %b
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_udiv_const_amount_4(i32 %a) {
; CHECK-LABEL: 'test_udiv_const_amount_4'
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 4
; CHECK-DAG: DemandedBits: 0x3fc for %a in %div = udiv i32 %a, 4
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %div = udiv i32 %a, 4
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = udiv i32 %a, 4
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_udiv_const_amount_5(i32 %a) {
; CHECK-LABEL: 'test_udiv_const_amount_5'
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 5
; CHECK-DAG: DemandedBits: 0x7ff for %a in %div = udiv i32 %a, 5
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's imagine %a is 0xfffffff. 0xffffffff/0x5 is 0x33333333.

This is saying that we're allowed to treat %a as 0x7ff because the upper bits don't matter. 0x7ff/0x5 is 0x199. That's a different value in bits 7:0 than the 0x33333333.

What am I missing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct, when developing the algorithm, I assumed Knuth-like division, which isn't exactly accurate, and bit-by-bit recurrence. Real division isn't exactly like this.

; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %div = udiv i32 %a, 5
;
%div = udiv i32 %a, 5
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_udiv_const_amount_8(i32 %a) {
; CHECK-LABEL: 'test_udiv_const_amount_8'
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 8
; CHECK-DAG: DemandedBits: 0x7f8 for %a in %div = udiv i32 %a, 8
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %div = udiv i32 %a, 8
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = udiv i32 %a, 8
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_udiv_const_amount_9(i32 %a) {
; CHECK-LABEL: 'test_udiv_const_amount_9'
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, 9
; CHECK-DAG: DemandedBits: 0xfff for %a in %div = udiv i32 %a, 9
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %div = udiv i32 %a, 9
;
%div = udiv i32 %a, 9
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_udiv(i32 %a, i32 %b) {
; CHECK-LABEL: 'test_udiv'
; CHECK-DAG: DemandedBits: 0xff for %div = udiv i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %div = udiv i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %div = udiv i32 %a, %b
; CHECK-DAG: DemandedBits: 0xff for %div.t = trunc i32 %div to i8
; CHECK-DAG: DemandedBits: 0xff for %div in %div.t = trunc i32 %div to i8
;
%div = udiv i32 %a, %b
%div.t = trunc i32 %div to i8
ret i8 %div.t
}

define i8 @test_srem_const_amount_4(i32 %a) {
; CHECK-LABEL: 'test_srem_const_amount_4'
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 4
; CHECK-DAG: DemandedBits: 0x3 for %a in %rem = srem i32 %a, 4
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %rem = srem i32 %a, 4
;
%rem = srem i32 %a, 4
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_srem_const_amount_5(i32 %a) {
; CHECK-LABEL: 'test_srem_const_amount_5'
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 5
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = srem i32 %a, 5
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %rem = srem i32 %a, 5
;
%rem = srem i32 %a, 5
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_srem_const_amount_8(i32 %a) {
; CHECK-LABEL: 'test_srem_const_amount_8'
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 8
; CHECK-DAG: DemandedBits: 0x7 for %a in %rem = srem i32 %a, 8
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %rem = srem i32 %a, 8
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
;
%rem = srem i32 %a, 8
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_srem_const_amount_9(i32 %a) {
; CHECK-LABEL: 'test_srem_const_amount_9'
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, 9
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = srem i32 %a, 9
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %rem = srem i32 %a, 9
;
%rem = srem i32 %a, 9
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_srem(i32 %a, i32 %b) {
; CHECK-LABEL: 'test_srem'
; CHECK-DAG: DemandedBits: 0xff for %rem = srem i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = srem i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %rem = srem i32 %a, %b
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
;
%rem = srem i32 %a, %b
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_urem_const_amount_4(i32 %a) {
; CHECK-LABEL: 'test_urem_const_amount_4'
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 4
; CHECK-DAG: DemandedBits: 0x3 for %a in %rem = urem i32 %a, 4
; CHECK-DAG: DemandedBits: 0xffffffff for 4 in %rem = urem i32 %a, 4
;
%rem = urem i32 %a, 4
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}






define i8 @test_urem_const_amount_5(i32 %a) {
; CHECK-LABEL: 'test_urem_const_amount_5'
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 5
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = urem i32 %a, 5
; CHECK-DAG: DemandedBits: 0xffffffff for 5 in %rem = urem i32 %a, 5
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
;
%rem = urem i32 %a, 5
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_urem_const_amount_8(i32 %a) {
; CHECK-LABEL: 'test_urem_const_amount_8'
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 8
; CHECK-DAG: DemandedBits: 0x7 for %a in %rem = urem i32 %a, 8
; CHECK-DAG: DemandedBits: 0xffffffff for 8 in %rem = urem i32 %a, 8
;
%rem = urem i32 %a, 8
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_urem_const_amount_9(i32 %a) {
; CHECK-LABEL: 'test_urem_const_amount_9'
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, 9
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = urem i32 %a, 9
; CHECK-DAG: DemandedBits: 0xffffffff for 9 in %rem = urem i32 %a, 9
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
;
%rem = urem i32 %a, 9
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}

define i8 @test_urem(i32 %a, i32 %b) {
; CHECK-LABEL: 'test_urem'
; CHECK-DAG: DemandedBits: 0xff for %rem = urem i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %a in %rem = urem i32 %a, %b
; CHECK-DAG: DemandedBits: 0xffffffff for %b in %rem = urem i32 %a, %b
; CHECK-DAG: DemandedBits: 0xff for %rem.t = trunc i32 %rem to i8
; CHECK-DAG: DemandedBits: 0xff for %rem in %rem.t = trunc i32 %rem to i8
;
%rem = urem i32 %a, %b
%rem.t = trunc i32 %rem to i8
ret i8 %rem.t
}