summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp11
-rw-r--r--test/CodeGen/X86/sse1.ll14
2 files changed, 25 insertions, 0 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 2db913d4f1..4f57bc3201 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2192,6 +2192,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
+ // If we have to split the condition there is no point in widening the
+ // select. This would result in an cycle of widening the select ->
+ // widening the condition operand -> splitting the condition operand ->
+ // splitting the select -> widening the select. Instead split this select
+ // further and widen the resulting type.
+ if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+ SDValue Res = ModifyToType(SplitSelect, WidenVT);
+ return Res;
+ }
+
if (Cond1.getValueType() != CondWidenVT)
Cond1 = ModifyToType(Cond1, CondWidenVT);
}
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index 47c6429b18..183297e4c3 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -43,3 +43,17 @@ entry:
; CHECK-NOT: shufps $16
; CHECK: ret
}
+
+; We used to get stuck in type legalization for this example when lowering the
+; vselect. With SSE1 v4f32 is a legal type but v4i1 (or any vector integer type)
+; is not. We used to ping pong between splitting the vselect for the v4i
+; condition operand and widening the resulting vselect for the v4f32 result.
+; PR18036
+
+; CHECK-LABEL: vselect
+define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
+entry:
+ %a1 = icmp eq <4 x i32> %q, zeroinitializer
+ %a14 = select <4 x i1> %a1, <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+0> , <4 x float> zeroinitializer
+ ret <4 x float> %a14
+}