5 files changed, 52 insertions, 5 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1ed35d88c3..d74a87281c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -917,6 +917,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
     setOperationAction(ISD::SHL,                MVT::v16i8, Custom);
 
+    setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v16i8, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v8i16, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
+
     // i8 and i16 vectors are custom , because the source register and source
     // source memory operand types are not the same width.  f32 vectors are
     // custom since the immediate controlling the insert encodes additional
@@ -8684,6 +8691,32 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
 }
 
+SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond  = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+
+  SDValue Ops[] = {Cond, Op1, Op2};
+
+  assert(Op1.getValueType().isVector() && "Op1 must be a vector");
+  assert(Op2.getValueType().isVector() && "Op2 must be a vector");
+  assert(Cond.getValueType().isVector() && "Cond must be a vector");
+  assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch");
+  
+  switch (Op1.getValueType().getSimpleVT().SimpleTy) {
+    default: break;
+    case MVT::v2i64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
+    case MVT::v2f64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
+    case MVT::v4i32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
+    case MVT::v4f32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
+    case MVT::v16i8: return DAG.getNode(X86ISD::PBLENDVB, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
+  }
+  
+  return SDValue();
+}
+
+
 // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
 // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
 // from the AND / OR.
@@ -10350,6 +10383,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FGETSIGN:           return LowerFGETSIGN(Op, DAG);
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::SELECT:             return LowerSELECT(Op, DAG);
+  case ISD::VSELECT:            return LowerVSELECT(Op, DAG);
   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
   case ISD::VASTART:            return LowerVASTART(Op, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 6ff9ca55d3..3051e16485 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -175,8 +175,10 @@ namespace llvm {
       /// PSIGNB/W/D - Copy integer sign.
       PSIGNB, PSIGNW, PSIGND,
 
-      /// PBLENDVB - Variable blend
+      /// BLENDVXX family of opcodes
       PBLENDVB,
+      BLENDVPD,
+      BLENDVPS,
 
       /// FMAX, FMIN - Floating point max and min.
       ///
@@ -809,6 +811,7 @@ namespace llvm {
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index e80038e3a0..7ad9c87d28 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -58,9 +58,15 @@ def X86psignw  : SDNode<"X86ISD::PSIGNW",
 def X86psignd  : SDNode<"X86ISD::PSIGND",
                  SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
-def X86pblendv : SDNode<"X86ISD::PBLENDVB",
+def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
+def X86blendvpd : SDNode<"X86ISD::BLENDVPD", 
+                  SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+                                       SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
+def X86blendvps : SDNode<"X86ISD::BLENDVPS", 
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
 def X86pextrw  : SDNode<"X86ISD::PEXTRW",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 8eab5d6721..c210a987dc 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5843,7 +5843,7 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
                                          memopv32i8, int_x86_avx_blendv_ps_256>;
 
-def : Pat<(X86pblendv VR128:$src1, VR128:$src2, VR128:$src3),
+def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),
           (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
           Requires<[HasAVX]>;
 
@@ -5871,8 +5871,12 @@ defm BLENDVPD     : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
 defm BLENDVPS     : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
 defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
 
-def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
           (PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
+def : Pat<(X86blendvpd  XMM0, VR128:$src1, VR128:$src2),
+          (BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
+def : Pat<(X86blendvps  XMM0, VR128:$src1, VR128:$src2),
+          (BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
 
 let Predicates = [HasAVX] in
 def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
diff --git a/test/CodeGen/Generic/promote-integers.ll b/test/CodeGen/Generic/promote-integers.ll
index 58125921b3..d965abf261 100644
--- a/test/CodeGen/Generic/promote-integers.ll
+++ b/test/CodeGen/Generic/promote-integers.ll
@@ -3,8 +3,8 @@
 
 ; This test is the poster-child for integer-element-promotion.
 ; Until this feature is complete, we mark this test as expected to fail.
-; XFAIL: *
 ; CHECK: vector_code
+; CHECK: blend
 ; CHECK: ret
 define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 )  {
    %C = icmp eq <4 x i64> %A, %B