summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp5
-rw-r--r--test/CodeGen/X86/narrow-shl-load.ll24
2 files changed, 25 insertions, 4 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2523bb2451..e4f3027053 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4241,12 +4241,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
}
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
// If the load was a sextload then the result is a splat of the sign bit
// of the extended byte. This is not worth optimizing for.
- if (ShAmt >= VT.getSizeInBits())
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
return SDValue();
}
}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index 53b03884a5..ef27cbc341 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-pc-linux-gnu"
; DAGCombiner should fold this code in finite time.
; rdar://8606584
-define void @D() nounwind readnone {
+define void @test1() nounwind readnone {
bb.nph:
br label %while.cond
@@ -33,10 +33,10 @@ while.end: ; preds = %while.cond
; DAGCombiner shouldn't fold the sdiv (ashr) away.
; rdar://8636812
-; CHECK: main:
+; CHECK: test2:
; CHECK: sarl
-define i32 @main() nounwind {
+define i32 @test2() nounwind {
entry:
%i = alloca i32, align 4
%j = alloca i8, align 1
@@ -63,3 +63,21 @@ if.end: ; preds = %entry
declare void @abort() noreturn
declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+ volatile store i32 128, i32* %P
+ %tmp4.pre = load i32* %P
+ %phitmp = trunc i32 %tmp4.pre to i16
+ %phitmp13 = shl i16 %phitmp, 8
+ %phitmp14 = ashr i16 %phitmp13, 8
+ %phitmp15 = lshr i16 %phitmp14, 8
+ %phitmp16 = zext i16 %phitmp15 to i32
+ ret i32 %phitmp16
+
+; CHECK: movl $128, (%rdi)
+; CHECK-NEXT: movsbl (%rdi), %eax
+; CHECK-NEXT: movzbl %ah, %eax
+; CHECK-NEXT: ret
+}