diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-04-20 00:38:45 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-04-20 00:38:45 +0000 |
commit | eece9dc81cedf0cda5c356c180f60d91c99920fc (patch) | |
tree | 9e8581cf7f5da30f274c65bbcb4f0f94c3f9dd44 | |
parent | 181b14797518e714e1b6112db849ca53192b8f23 (diff) | |
download | llvm-eece9dc81cedf0cda5c356c180f60d91c99920fc.tar.gz llvm-eece9dc81cedf0cda5c356c180f60d91c99920fc.tar.bz2 llvm-eece9dc81cedf0cda5c356c180f60d91c99920fc.tar.xz |
Revert r155136 "Defer some shl transforms to DAGCombine."
While the patch was perfect and defect free, it exposed a really nasty
bug in X86 SelectionDAG that caused an llc crash when compiling lencod.
I'll put the patch back in after fixing the SelectionDAG problem.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155181 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 74 | ||||
-rw-r--r-- | test/Transforms/InstCombine/2010-11-01-lshr-mask.ll | 8 | ||||
-rw-r--r-- | test/Transforms/InstCombine/apint-shift.ll | 16 | ||||
-rw-r--r-- | test/Transforms/InstCombine/cast.ll | 8 | ||||
-rw-r--r-- | test/Transforms/InstCombine/shift.ll | 59 | ||||
-rw-r--r-- | test/Transforms/PhaseOrdering/basic.ll | 27 | ||||
-rw-r--r-- | test/Transforms/PhaseOrdering/scev.ll | 64 |
7 files changed, 56 insertions, 200 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 4c14509cb6..b31049e59f 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -529,19 +529,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ShiftOp = 0; if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { - - // This is a constant shift of a constant shift. Be careful about hiding - // shl instructions behind bit masks. They are used to represent multiplies - // by a constant, and it is important that simple arithmetic expressions - // are still recognizable by scalar evolution. - // - // The transforms applied to shl are very similar to the transforms applied - // to mul by constant. We can be more aggressive about optimizing right - // shifts. - // - // Combinations of right and left shifts will still be optimized in - // DAGCombine where scalar evolution no longer applies. - ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); @@ -567,6 +554,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } if (ShiftAmt1 == ShiftAmt2) { + // If we have ((X >>? C) << C), turn this into X & (-1 << C). + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(),Mask)); + } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { @@ -576,23 +570,28 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; - - // (X >>?,exact C1) << C2 --> X << (C2-C1) - // The inexact version is deferred to DAGCombine so we don't hide shl - // behind a bit mask. + + // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl && - ShiftOp->isExact()) { + ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); - BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, - X, ShiftDiffCst); - NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); - return NewShl; + if (ShiftOp->isExact()) { + // (X >>?,exact C1) << C2 --> X << (C2-C1) + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + return NewShl; + } + Value *Shift = Builder->CreateShl(X, ShiftDiffCst); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); } - + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { @@ -628,19 +627,24 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; - // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) - // The inexact version is deferred to DAGCombine so we don't hide shl - // behind a bit mask. + // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl && - ShiftOp->isExact()) { + ShiftOp->getOpcode() != Instruction::Shl) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); - BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), - X, ShiftDiffCst); - NewShr->setIsExact(true); - return NewShr; + if (ShiftOp->isExact()) { + // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) + BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), + X, ShiftDiffCst); + NewShr->setIsExact(true); + return NewShr; + } + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), + X, ShiftDiffCst); + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); } - + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll index eb28994756..441d5f9b0b 100644 --- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll +++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll @@ -5,8 +5,8 @@ define i32 @main(i32 %argc) nounwind ssp { entry: %tmp3151 = trunc i32 %argc to i8 -; CHECK: %tmp3163 = shl i8 %tmp3162, 6 -; CHECK: and i8 %tmp3163, 64 +; CHECK: %tmp3162 = shl i8 %tmp3151, 5 +; CHECK: and i8 %tmp3162, 64 ; CHECK-NOT: shl ; CHECK-NOT: shr %tmp3161 = or i8 %tmp3151, -17 @@ -38,8 +38,8 @@ bb: %tmp10 = lshr i8 %tmp8, 7 %tmp11 = shl i8 %tmp10, 5 -; CHECK: %tmp10 = lshr i8 %tmp8, 7 -; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5 +; CHECK: %0 = lshr i8 %tmp8, 2 +; CHECK: %tmp11 = and i8 %0, 32 %tmp12 = xor i8 %tmp11, %tmp9 ret i8 %tmp12 diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll index 73f630ebfe..0ea73a058c 100644 --- a/test/Transforms/InstCombine/apint-shift.ll +++ b/test/Transforms/InstCombine/apint-shift.ll @@ -47,21 +47,13 @@ define i32 @test5a(i32 %A) { } ; CHECK: @test6 -; CHECK: mul i55 %A, 6 +; CHECK-NOT: sh define i55 @test6(i55 %A) { %B = shl i55 %A, 1 ; <i55> [#uses=1] %C = mul i55 %B, 3 ; <i55> [#uses=1] ret i55 %C } -; CHECK: @test6a -; CHECK: mul i55 %A, 6 -define i55 @test6a(i55 %A) { - %B = mul i55 %A, 3 ; <i55> [#uses=1] - %C = shl i55 %B, 1 ; <i55> [#uses=1] - ret i55 %C -} - ; CHECK: @test7 ; CHECK-NOT: sh define i29 @test7(i8 %X) { @@ -95,8 +87,7 @@ define i19 @test10(i19 %A) { } ; CHECK: @test11 -; Don't hide the shl from scalar evolution. DAGCombine will get it. -; CHECK: shl +; CHECK-NOT: sh define i23 @test11(i23 %A) { %a = mul i23 %A, 3 ; <i23> [#uses=1] %B = lshr i23 %a, 11 ; <i23> [#uses=1] @@ -113,8 +104,7 @@ define i47 @test12(i47 %A) { } ; CHECK: @test13 -; Don't hide the shl from scalar evolution. DAGCombine will get it. -; CHECK: shl +; CHECK-NOT: sh define i18 @test13(i18 %A) { %a = mul i18 %A, 3 ; <i18> [#uses=1] %B = ashr i18 %a, 8 ; <i18> [#uses=1] diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll index 7cf63e8ce1..19d5a0ae77 100644 --- a/test/Transforms/InstCombine/cast.ll +++ b/test/Transforms/InstCombine/cast.ll @@ -457,12 +457,10 @@ define i64 @test50(i64 %A) { %E = sext i32 %D to i64 ret i64 %E ; CHECK: @test50 -; lshr+shl will be handled by DAGCombine. -; CHECK-NEXT: lshr i64 %A, 2 -; CHECK-NEXT: shl i64 %a, 32 +; CHECK-NEXT: shl i64 %A, 30 ; CHECK-NEXT: add i64 {{.*}}, -4294967296 -; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32 -; CHECK-NEXT: ret i64 %E +; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32 +; CHECK-NEXT: ret i64 %sext } define i64 @test51(i64 %A, i1 %cond) { diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index 25e708b7f5..52310e34e0 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -65,17 +65,8 @@ define i32 @test6(i32 %A) { ; CHECK: @test6 ; CHECK-NEXT: mul i32 %A, 6 ; CHECK-NEXT: ret i32 - %B = shl i32 %A, 1 ;; convert to an mul instruction - %C = mul i32 %B, 3 - ret i32 %C -} - -define i32 @test6a(i32 %A) { -; CHECK: @test6a -; CHECK-NEXT: mul i32 %A, 6 -; CHECK-NEXT: ret i32 - %B = mul i32 %A, 3 - %C = shl i32 %B, 1 ;; convert to an mul instruction + %B = shl i32 %A, 1 ;; convert to an mul instruction + %C = mul i32 %B, 3 ret i32 %C } @@ -106,9 +97,7 @@ define i8 @test9(i8 %A) { ret i8 %C } -;; This transformation is deferred to DAGCombine: ;; (A >> 7) << 7 === A & 128 -;; The shl may be valuable to scalar evolution. define i8 @test10(i8 %A) { ; CHECK: @test10 ; CHECK-NEXT: and i8 %A, -128 @@ -118,21 +107,11 @@ define i8 @test10(i8 %A) { ret i8 %C } -;; Allow the simplification when the lshr shift is exact. -define i8 @test10a(i8 %A) { -; CHECK: @test10a -; CHECK-NEXT: ret i8 %A - %B = lshr exact i8 %A, 7 - %C = shl i8 %B, 7 - ret i8 %C -} - -;; This transformation is deferred to DAGCombine: ;; (A >> 3) << 4 === (A & 0x1F) << 1 -;; The shl may be valuable to scalar evolution. define i8 @test11(i8 %A) { ; CHECK: @test11 -; CHECK: shl i8 +; CHECK-NEXT: mul i8 %A, 6 +; CHECK-NEXT: and i8 ; CHECK-NEXT: ret i8 %a = mul i8 %A, 3 ; <i8> [#uses=1] %B = lshr i8 %a, 3 ; <i8> [#uses=1] @@ -140,18 +119,6 @@ define i8 @test11(i8 %A) { ret i8 %C } -;; Allow the simplification in InstCombine when the lshr shift is exact. -define i8 @test11a(i8 %A) { -; CHECK: @test11a -; CHECK-NEXT: mul i8 %A, 6 -; CHECK-NEXT: ret i8 - %a = mul i8 %A, 3 - %B = lshr exact i8 %a, 3 - %C = shl i8 %B, 4 - ret i8 %C -} - -;; This is deferred to DAGCombine unless %B is single-use. ;; (A >> 8) << 8 === A & -256 define i32 @test12(i32 %A) { ; CHECK: @test12 @@ -162,12 +129,11 @@ define i32 @test12(i32 %A) { ret i32 %C } -;; This transformation is deferred to DAGCombine: ;; (A >> 3) << 4 === (A & -8) * 2 -;; The shl may be valuable to scalar evolution. define i8 @test13(i8 %A) { ; CHECK: @test13 -; CHECK: shl i8 +; CHECK-NEXT: mul i8 %A, 6 +; CHECK-NEXT: and i8 ; CHECK-NEXT: ret i8 %a = mul i8 %A, 3 ; <i8> [#uses=1] %B = ashr i8 %a, 3 ; <i8> [#uses=1] @@ -175,16 +141,6 @@ define i8 @test13(i8 %A) { ret i8 %C } -define i8 @test13a(i8 %A) { -; CHECK: @test13a -; CHECK-NEXT: mul i8 %A, 6 -; CHECK-NEXT: ret i8 - %a = mul i8 %A, 3 - %B = ashr exact i8 %a, 3 - %C = shl i8 %B, 4 - ret i8 %C -} - ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4) define i32 @test14(i32 %A) { ; CHECK: @test14 @@ -521,11 +477,10 @@ entry: %tmp49 = lshr i8 %tmp48, 5 %tmp50 = mul i8 %tmp49, 64 %tmp51 = xor i8 %tmp50, %tmp5 +; CHECK: and i8 %0, 16 %tmp52 = and i8 %tmp51, -128 %tmp53 = lshr i8 %tmp52, 7 -; CHECK: lshr i8 %tmp51, 7 %tmp54 = mul i8 %tmp53, 16 -; CHECK: shl nuw nsw i8 %tmp53, 4 %tmp55 = xor i8 %tmp54, %tmp51 ; CHECK: ret i8 %tmp551 ret i8 %tmp55 diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll index 88ebca0a9c..2d52ae50df 100644 --- a/test/Transforms/PhaseOrdering/basic.ll +++ b/test/Transforms/PhaseOrdering/basic.ll @@ -22,30 +22,3 @@ define void @test1() nounwind ssp { ; CHECK: @test1 ; CHECK-NEXT: ret void } - -; This function exposes a phase ordering problem when InstCombine is -; turning %add into a bitmask, making it difficult to spot a 0 return value. -; -; It it also important that %add is expressed as a multiple of %div so scalar -; evolution can recognize it. -define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp { -entry: - %div = udiv i32 %a, 4 - %arrayidx = getelementptr inbounds i32* %p, i64 0 - store i32 %div, i32* %arrayidx, align 4 - %add = add i32 %div, %div - %arrayidx1 = getelementptr inbounds i32* %p, i64 1 - store i32 %add, i32* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32* %p, i64 1 - %0 = load i32* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds i32* %p, i64 0 - %1 = load i32* %arrayidx3, align 4 - %mul = mul i32 2, %1 - %sub = sub i32 %0, %mul - ret i32 %sub - -; CHECK: @test2 -; CHECK: %div = lshr i32 %a, 2 -; CHECK: %add = shl nuw nsw i32 %div, 1 -; CHECK: ret i32 0 -} diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll deleted file mode 100644 index c731280822..0000000000 --- a/test/Transforms/PhaseOrdering/scev.ll +++ /dev/null @@ -1,64 +0,0 @@ -; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s -; -; This file contains phase ordering tests for scalar evolution. -; Test that the standard passes don't obfuscate the IR so scalar evolution can't -; recognize expressions. - -; CHECK: test1 -; The loop body contains two increments by %div. -; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d. -; CHECK: --> {%p,+,(2 * (%d /u 4) * sizeof(i32))} -define void @test1(i64 %d, i32* %p) nounwind uwtable ssp { -entry: - %div = udiv i64 %d, 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ] - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %cmp = icmp ne i32 %i.0, 64 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - store i32 0, i32* %p.addr.0, align 4 - %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div - store i32 1, i32* %add.ptr, align 4 - %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div - br label %for.inc - -for.inc: ; preds = %for.body - %inc = add i32 %i.0, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; CHECK: test1a -; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2) -; CHECK: --> {%p,+,(2 * (%d /u 2) * sizeof(i32))} -define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp { -entry: - %div = udiv i64 %d, 2 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ] - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %cmp = icmp ne i32 %i.0, 64 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - store i32 0, i32* %p.addr.0, align 4 - %add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div - store i32 1, i32* %add.ptr, align 4 - %add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div - br label %for.inc - -for.inc: ; preds = %for.body - %inc = add i32 %i.0, 1 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} |