summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Lewycky <nicholas@mxc.ca>2012-03-18 23:28:48 +0000
committerNick Lewycky <nicholas@mxc.ca>2012-03-18 23:28:48 +0000
commitf201a066625e32884c9d2b766ff48fe0b70e179a (patch)
treebbfe58f4c1e432c9981e11f321492912af388a60
parent97327dc6ef5183bbad308e19ed86488c7e94d973 (diff)
downloadllvm-f201a066625e32884c9d2b766ff48fe0b70e179a.tar.gz
llvm-f201a066625e32884c9d2b766ff48fe0b70e179a.tar.bz2
llvm-f201a066625e32884c9d2b766ff48fe0b70e179a.tar.xz
Factor out the multiply analysis code in ComputeMaskedBits and apply it to the
overflow checking multiply intrinsic as well. Add a test for this, updating the test from grep to FileCheck. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153028 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Analysis/ValueTracking.cpp138
-rw-r--r--test/Transforms/InstCombine/mul.ll80
2 files changed, 150 insertions, 68 deletions
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 904c27e89d..01e00caa3b 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -130,6 +130,71 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
}
}
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+ const APInt &Mask,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const TargetData *TD, unsigned Depth) {
+ unsigned BitWidth = Mask.getBitWidth();
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1);
+ ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ bool isKnownNegative = false;
+ bool isKnownNonNegative = false;
+ // If the multiplication is known not to overflow, compute the sign bit.
+ if (Mask.isNegative() && NSW) {
+ if (Op0 == Op1) {
+ // The product of a number with itself is non-negative.
+ isKnownNonNegative = true;
+ } else {
+ bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+ bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+ bool isKnownNegativeOp1 = KnownOne.isNegative();
+ bool isKnownNegativeOp0 = KnownOne2.isNegative();
+ // The product of two numbers with the same sign is non-negative.
+ isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+ (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+ // The product of a negative number and a non-negative number is either
+ // negative or zero.
+ if (!isKnownNonNegative)
+ isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+ isKnownNonZero(Op0, TD, Depth)) ||
+ (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+ isKnownNonZero(Op1, TD, Depth));
+ }
+ }
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+
+ // Only make use of no-wrap flags if we failed to compute the sign bit
+ // directly. This matters if the multiplication always overflows, in
+ // which case we prefer to follow the result of the direct computation,
+ // though as the program is invoking undefined behaviour we can choose
+ // whatever we like here.
+ if (isKnownNonNegative && !KnownOne.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ else if (isKnownNegative && !KnownZero.isNegative())
+ KnownOne.setBit(BitWidth - 1);
+}
+
/// ComputeMaskedBits - Determine which of the bits specified in Mask are
/// known to be either zero or one and return them in the KnownZero/KnownOne
/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
@@ -294,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
case Instruction::Mul: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
- Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- bool isKnownNegative = false;
- bool isKnownNonNegative = false;
- // If the multiplication is known not to overflow, compute the sign bit.
- if (Mask.isNegative() &&
- cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) {
- Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0);
- if (Op1 == Op2) {
- // The product of a number with itself is non-negative.
- isKnownNonNegative = true;
- } else {
- bool isKnownNonNegative1 = KnownZero.isNegative();
- bool isKnownNonNegative2 = KnownZero2.isNegative();
- bool isKnownNegative1 = KnownOne.isNegative();
- bool isKnownNegative2 = KnownOne2.isNegative();
- // The product of two numbers with the same sign is non-negative.
- isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) ||
- (isKnownNonNegative1 && isKnownNonNegative2);
- // The product of a negative number and a non-negative number is either
- // negative or zero.
- if (!isKnownNonNegative)
- isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 &&
- isKnownNonZero(Op2, TD, Depth)) ||
- (isKnownNegative2 && isKnownNonNegative1 &&
- isKnownNonZero(Op1, TD, Depth));
- }
- }
-
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- KnownOne.clearAllBits();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
- KnownZero2.countLeadingOnes(),
- BitWidth) - BitWidth;
-
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
-
- // Only make use of no-wrap flags if we failed to compute the sign bit
- // directly. This matters if the multiplication always overflows, in
- // which case we prefer to follow the result of the direct computation,
- // though as the program is invoking undefined behaviour we can choose
- // whatever we like here.
- if (isKnownNonNegative && !KnownOne.isNegative())
- KnownZero.setBit(BitWidth - 1);
- else if (isKnownNegative && !KnownZero.isNegative())
- KnownOne.setBit(BitWidth - 1);
-
- return;
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+ Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+ TD, Depth);
+ break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
@@ -777,6 +785,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
KnownZero, KnownOne, KnownZero2, KnownOne2,
TD, Depth);
break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+ false, Mask, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, TD, Depth);
+ break;
}
}
}
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 53a56434ae..edb530585c 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,116 +1,184 @@
; This test makes sure that mul instructions are properly eliminated.
-; RUN: opt < %s -instcombine -S | not grep mul
+; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @test1(i32 %A) {
+; CHECK: @test1
%B = mul i32 %A, 1 ; <i32> [#uses=1]
ret i32 %B
+; CHECK: ret i32 %A
}
define i32 @test2(i32 %A) {
+; CHECK: @test2
; Should convert to an add instruction
%B = mul i32 %A, 2 ; <i32> [#uses=1]
ret i32 %B
+; CHECK: shl i32 %A, 1
}
define i32 @test3(i32 %A) {
+; CHECK: @test3
; This should disappear entirely
%B = mul i32 %A, 0 ; <i32> [#uses=1]
ret i32 %B
+; CHECK: ret i32 0
}
define double @test4(double %A) {
+; CHECK: @test4
; This is safe for FP
%B = fmul double 1.000000e+00, %A ; <double> [#uses=1]
ret double %B
+; CHECK: ret double %A
}
define i32 @test5(i32 %A) {
+; CHECK: @test5
%B = mul i32 %A, 8 ; <i32> [#uses=1]
ret i32 %B
+; CHECK: shl i32 %A, 3
}
define i8 @test6(i8 %A) {
+; CHECK: @test6
%B = mul i8 %A, 8 ; <i8> [#uses=1]
%C = mul i8 %B, 8 ; <i8> [#uses=1]
ret i8 %C
+; CHECK: shl i8 %A, 6
}
define i32 @test7(i32 %i) {
+; CHECK: @test7
%tmp = mul i32 %i, -1 ; <i32> [#uses=1]
ret i32 %tmp
+; CHECK: sub i32 0, %i
}
define i64 @test8(i64 %i) {
- ; tmp = sub 0, %i
+; CHECK: @test8
%j = mul i64 %i, -1 ; <i64> [#uses=1]
ret i64 %j
+; CHECK: sub i64 0, %i
}
define i32 @test9(i32 %i) {
- ; %j = sub 0, %i
+; CHECK: @test9
%j = mul i32 %i, -1 ; <i32> [#uses=1]
ret i32 %j
+; CHECJ: sub i32 0, %i
}
define i32 @test10(i32 %a, i32 %b) {
+; CHECK: @test10
%c = icmp slt i32 %a, 0 ; <i1> [#uses=1]
%d = zext i1 %c to i32 ; <i32> [#uses=1]
; e = b & (a >> 31)
%e = mul i32 %d, %b ; <i32> [#uses=1]
ret i32 %e
+; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST10]], %b
+; CHECK-NEXT: ret i32 %e
}
define i32 @test11(i32 %a, i32 %b) {
+; CHECK: @test11
%c = icmp sle i32 %a, -1 ; <i1> [#uses=1]
%d = zext i1 %c to i32 ; <i32> [#uses=1]
; e = b & (a >> 31)
%e = mul i32 %d, %b ; <i32> [#uses=1]
ret i32 %e
+; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST11]], %b
+; CHECK-NEXT: ret i32 %e
}
-define i32 @test12(i8 %a, i32 %b) {
- %c = icmp ugt i8 %a, 127 ; <i1> [#uses=1]
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK: @test12
+ %c = icmp ugt i32 %a, 2147483647 ; <i1> [#uses=1]
%d = zext i1 %c to i32 ; <i32> [#uses=1]
- ; e = b & (a >> 31)
%e = mul i32 %d, %b ; <i32> [#uses=1]
ret i32 %e
+; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST12]], %b
+; CHECK-NEXT: ret i32 %e
+
}
; PR2642
define internal void @test13(<4 x float>*) {
+; CHECK: @test13
load <4 x float>* %0, align 1
fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
store <4 x float> %3, <4 x float>* %0, align 1
ret void
+; CHECK-NEXT: ret void
}
define <16 x i8> @test14(<16 x i8> %a) {
+; CHECK: @test14
%b = mul <16 x i8> %a, zeroinitializer
ret <16 x i8> %b
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
}
; rdar://7293527
define i32 @test15(i32 %A, i32 %B) {
+; CHECK: @test15
entry:
%shl = shl i32 1, %B
%m = mul i32 %shl, %A
ret i32 %m
+; CHECK: shl i32 %A, %B
}
; X * Y (when Y is 0 or 1) --> x & (0-Y)
define i32 @test16(i32 %b, i1 %c) {
+; CHECK: @test16
%d = zext i1 %c to i32 ; <i32> [#uses=1]
; e = b & (a >> 31)
%e = mul i32 %d, %b ; <i32> [#uses=1]
ret i32 %e
+; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
+; CHECK-NEXT: %e = and i32 [[TEST16]], %b
+; CHECK-NEXT: ret i32 %e
}
; X * Y (when Y is 0 or 1) --> x & (0-Y)
define i32 @test17(i32 %a, i32 %b) {
+; CHECK: @test17
%a.lobit = lshr i32 %a, 31
%e = mul i32 %a.lobit, %b
ret i32 %e
+; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST17]], %b
+; CHECK-NEXT: ret i32 %e
+}
+
+define i32 @test18(i32 %A, i32 %B) {
+; CHECK: @test18
+ %C = and i32 %A, 1
+ %D = and i32 %B, 1
+
+ %E = mul i32 %C, %D
+ %F = and i32 %E, 16
+ ret i32 %F
+; CHECK-NEXT: ret i32 0
}
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare void @use(i1)
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK: @test19
+ %C = and i32 %A, 1
+ %D = and i32 %B, 1
+; It would be nice if we also started proving that this doesn't overflow.
+ %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
+ %F = extractvalue {i32, i1} %E, 0
+ %G = extractvalue {i32, i1} %E, 1
+ call void @use(i1 %G)
+ %H = and i32 %F, 16
+ ret i32 %H
+; CHECK: ret i32 0
+}