2 files changed, 25 insertions, 4 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2523bb2451..e4f3027053 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4241,12 +4241,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
           return SDValue();
       }
 
+      // At this point, we must have a load or else we can't do the transform.
+      if (!isa<LoadSDNode>(N0)) return SDValue();
+      
       // If the shift amount is larger than the input type then we're not
       // accessing any of the loaded bytes.  If the load was a zextload/extload
       // then the result of the shift+trunc is zero/undef (handled elsewhere).
       // If the load was a sextload then the result is a splat of the sign bit
       // of the extended byte.  This is not worth optimizing for.
-      if (ShAmt >= VT.getSizeInBits())
+      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
         return SDValue();
     }
   }
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index 53b03884a5..ef27cbc341 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-pc-linux-gnu"
 ; DAGCombiner should fold this code in finite time.
 ; rdar://8606584
 
-define void @D() nounwind readnone {
+define void @test1() nounwind readnone {
 bb.nph:
   br label %while.cond
 
@@ -33,10 +33,10 @@ while.end:                                        ; preds = %while.cond
 
 ; DAGCombiner shouldn't fold the sdiv (ashr) away.
 ; rdar://8636812
-; CHECK: main:
+; CHECK: test2:
 ; CHECK:   sarl
 
-define i32 @main() nounwind {
+define i32 @test2() nounwind {
 entry:
   %i = alloca i32, align 4
   %j = alloca i8, align 1
@@ -63,3 +63,21 @@ if.end:                                           ; preds = %entry
 declare void @abort() noreturn
 
 declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+  volatile store i32 128, i32* %P
+  %tmp4.pre = load i32* %P
+  %phitmp = trunc i32 %tmp4.pre to i16
+  %phitmp13 = shl i16 %phitmp, 8
+  %phitmp14 = ashr i16 %phitmp13, 8
+  %phitmp15 = lshr i16 %phitmp14, 8
+  %phitmp16 = zext i16 %phitmp15 to i32
+  ret i32 %phitmp16
+  
+; CHECK: movl	$128, (%rdi)
+; CHECK-NEXT: movsbl	(%rdi), %eax
+; CHECK-NEXT: movzbl	%ah, %eax
+; CHECK-NEXT: ret
+}