3 files changed, 146 insertions, 77 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index bc5d699e53..d0d4f41d3b 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -705,27 +705,26 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) {
   return 0;
 }
 
-const unsigned MaxDepth = 6;
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
 
-// \brief Recursively visits the possible right hand operands of a udiv
-// instruction, seeing through select instructions, to determine if we can
-// replace the udiv with something simpler.  If we find that an operand is not
-// able to simplify the udiv, we abort the entire transformation.
-//
-// Inserts any intermediate instructions used for the simplification into
-// NewInstrs and returns a new instruction that depends upon them.
-static Instruction *visitUDivOperand(Value *Op0, Value *Op1,
-                                     const BinaryOperator &I,
-                                     SmallVectorImpl<Instruction *> &NewInstrs,
-                                     unsigned Depth = 0) {
   {
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
     const APInt *C;
     if (match(Op1, m_Power2(C))) {
-      BinaryOperator *LShr = BinaryOperator::CreateLShr(
-          Op0, ConstantInt::get(Op0->getType(), C->logBase2()));
+      BinaryOperator *LShr =
+      BinaryOperator::CreateLShr(Op0,
+                                 ConstantInt::get(Op0->getType(),
+                                                  C->logBase2()));
       if (I.isExact()) LShr->setIsExact();
       return LShr;
     }
@@ -734,68 +733,51 @@ static Instruction *visitUDivOperand(Value *Op0, Value *Op1,
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
-      ICmpInst *IC = new ICmpInst(ICmpInst::ICMP_ULT, Op0, C);
-      NewInstrs.push_back(IC);
-
+      Value *IC = Builder->CreateICmpULT(Op0, C);
       return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
                                 ConstantInt::get(I.getType(), 1));
     }
   }
 
+  // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+  if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+    Value *X;
+    ConstantInt *C1;
+    if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
+      APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
+      return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
+    }
+  }
+
   // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
   { const APInt *CI; Value *N;
     if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
         match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
-      if (*CI != 1) {
-        N = BinaryOperator::CreateAdd(
-            N, ConstantInt::get(N->getType(), CI->logBase2()));
-        NewInstrs.push_back(cast<Instruction>(N));
-      }
-      if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) {
-        N = new ZExtInst(N, Z->getDestTy());
-        NewInstrs.push_back(cast<Instruction>(N));
-      }
-      BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
-      if (I.isExact()) LShr->setIsExact();
-      return LShr;
+      if (*CI != 1)
+        N = Builder->CreateAdd(N,
+                               ConstantInt::get(N->getType(), CI->logBase2()));
+      if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
+        N = Builder->CreateZExt(N, Z->getDestTy());
+      if (I.isExact())
+        return BinaryOperator::CreateExactLShr(Op0, N);
+      return BinaryOperator::CreateLShr(Op0, N);
     }
   }
 
-  // The remaining tests are all recursive, so bail out if we hit the limit.
-  if (Depth++ == MaxDepth)
-    return 0;
+  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+  // where C1&C2 are powers of two.
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      // Construct the "on true" case of the select
+      Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+                                       I.isExact());
 
-  if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
-    if (Instruction *LHS =
-            visitUDivOperand(Op0, SI->getOperand(1), I, NewInstrs)) {
-      NewInstrs.push_back(LHS);
-      if (Instruction *RHS =
-              visitUDivOperand(Op0, SI->getOperand(2), I, NewInstrs)) {
-        NewInstrs.push_back(RHS);
-        return SelectInst::Create(SI->getCondition(), LHS, RHS);
-      }
-    }
+      // Construct the "on false" case of the select
+      Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+                                       I.isExact());
 
-  return 0;
-}
-
-Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
-    return ReplaceInstUsesWith(I, V);
-
-  // Handle the integer div common cases
-  if (Instruction *Common = commonIDivTransforms(I))
-    return Common;
-
-  // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
-  if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
-    Value *X;
-    ConstantInt *C1;
-    if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
-      APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
-      return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
+      // construct the select instruction and return it.
+      return SelectInst::Create(Cond, TSI, FSI);
     }
   }
 
@@ -806,21 +788,6 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
                                               I.isExact()),
                           I.getType());
 
-  // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
-  SmallVector<Instruction *, 4> NewInstrs;
-  Instruction *RetI = visitUDivOperand(Op0, Op1, I, NewInstrs);
-  for (unsigned i = 0, e = NewInstrs.size(); i != e; i++)
-    // If we managed to replace the UDiv completely, insert the new intermediate
-    // instructions before where the UDiv was.
-    // If we couldn't, we must clean up after ourselves by deleting the new
-    // instructions.
-    if (RetI)
-      NewInstrs[i]->insertBefore(&I);
-    else
-      delete NewInstrs[i];
-  if (RetI)
-    return RetI;
-
   return 0;
 }
 
diff --git a/test/Transforms/InstCombine/div-shift-crash.ll b/test/Transforms/InstCombine/div-shift-crash.ll
new file mode 100644
index 0000000000..a619724564
--- /dev/null
+++ b/test/Transforms/InstCombine/div-shift-crash.ll
@@ -0,0 +1,101 @@
+; RUN: opt -instcombine < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129 = type <{ i64 }>
+
+; Function Attrs: nounwind
+define void @main() #0 {
+entry:
+  %l_819.i.i = alloca %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129, align 8
+  br i1 undef, label %land.lhs.true, label %for.cond.i
+
+land.lhs.true:                                    ; preds = %entry
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %land.lhs.true, %entry
+  %0 = getelementptr inbounds %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129* %l_819.i.i, i64 0, i32 0
+  br label %for.cond.i6.i.i
+
+for.cond.i6.i.i:                                  ; preds = %for.body.i8.i.i, %for.cond.i
+  br i1 undef, label %for.body.i8.i.i, label %lbl_707.i.i.i
+
+for.body.i8.i.i:                                  ; preds = %for.cond.i6.i.i
+  br label %for.cond.i6.i.i
+
+lbl_707.i.i.i:                                    ; preds = %for.cond.i6.i.i
+  br i1 undef, label %lor.rhs.i.i.i, label %lor.end.i.i.i
+
+lor.rhs.i.i.i:                                    ; preds = %lbl_707.i.i.i
+  br label %lor.end.i.i.i
+
+lor.end.i.i.i:                                    ; preds = %lor.rhs.i.i.i, %lbl_707.i.i.i
+  br label %for.cond1.i.i.i.i
+
+for.cond1.i.i.i.i:                                ; preds = %for.body4.i.i.i.i, %lor.end.i.i.i
+  br i1 undef, label %for.body4.i.i.i.i, label %func_39.exit.i.i
+
+for.body4.i.i.i.i:                                ; preds = %for.cond1.i.i.i.i
+  br label %for.cond1.i.i.i.i
+
+func_39.exit.i.i:                                 ; preds = %for.cond1.i.i.i.i
+  %l_8191.sroa.0.0.copyload.i.i = load i64* %0, align 1
+  br label %for.cond1.i.i.i
+
+for.cond1.i.i.i:                                  ; preds = %safe_div_func_uint32_t_u_u.exit.i.i.i, %func_39.exit.i.i
+  br i1 undef, label %for.cond7.i.i.i, label %func_11.exit.i
+
+for.cond7.i.i.i:                                  ; preds = %for.end30.i.i.i, %for.cond1.i.i.i
+  %storemerge.i.i.i = phi i32 [ %sub.i.i.i, %for.end30.i.i.i ], [ 4, %for.cond1.i.i.i ]
+  br i1 undef, label %for.cond22.i.i.i, label %for.end32.i.i.i
+
+for.cond22.i.i.i:                                 ; preds = %for.body25.i.i.i, %for.cond7.i.i.i
+  br i1 undef, label %for.body25.i.i.i, label %for.end30.i.i.i
+
+for.body25.i.i.i:                                 ; preds = %for.cond22.i.i.i
+  br label %for.cond22.i.i.i
+
+for.end30.i.i.i:                                  ; preds = %for.cond22.i.i.i
+  %sub.i.i.i = add nsw i32 0, -1
+  br label %for.cond7.i.i.i
+
+for.end32.i.i.i:                                  ; preds = %for.cond7.i.i.i
+  %conv33.i.i.i = trunc i64 %l_8191.sroa.0.0.copyload.i.i to i32
+  %xor.i.i.i.i = xor i32 %storemerge.i.i.i, -701565022
+  %sub.i.i.i.i = sub nsw i32 0, %storemerge.i.i.i
+  %xor3.i.i.i.i = xor i32 %sub.i.i.i.i, %storemerge.i.i.i
+  %and4.i.i.i.i = and i32 %xor.i.i.i.i, %xor3.i.i.i.i
+  %cmp.i.i.i.i = icmp slt i32 %and4.i.i.i.i, 0
+  %sub5.i.i.i.i = sub nsw i32 -701565022, %storemerge.i.i.i
+  %.sub5.i.i.i.i = select i1 %cmp.i.i.i.i, i32 -701565022, i32 %sub5.i.i.i.i
+  br i1 undef, label %safe_div_func_uint32_t_u_u.exit.i.i.i, label %cond.false.i.i.i.i
+
+cond.false.i.i.i.i:                               ; preds = %for.end32.i.i.i
+  %div.i.i.i.i = udiv i32 %conv33.i.i.i, %.sub5.i.i.i.i
+  br label %safe_div_func_uint32_t_u_u.exit.i.i.i
+
+safe_div_func_uint32_t_u_u.exit.i.i.i:            ; preds = %cond.false.i.i.i.i, %for.end32.i.i.i
+  %cond.i.i.i.i = phi i32 [ %div.i.i.i.i, %cond.false.i.i.i.i ], [ %conv33.i.i.i, %for.end32.i.i.i ]
+  %cmp35.i.i.i = icmp ne i32 %cond.i.i.i.i, -7
+  br label %for.cond1.i.i.i
+
+func_11.exit.i:                                   ; preds = %for.cond1.i.i.i
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %func_11.exit.i
+  unreachable
+
+for.end:                                          ; preds = %func_11.exit.i
+  br label %for.cond15
+
+for.cond15:                                       ; preds = %for.cond19, %for.end
+  br i1 undef, label %for.cond19, label %for.end45
+
+for.cond19:                                       ; preds = %for.cond15
+  br label %for.cond15
+
+for.end45:                                        ; preds = %for.cond15
+  unreachable
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
index 46d0f9afd7..6be5cc4ca1 100644
--- a/test/Transforms/InstCombine/div-shift.ll
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+; XFAIL: *
 
 define i32 @t1(i16 zeroext %x, i32 %y) nounwind {
 entry: