diff options
author | Nadav Rotem <nadav.rotem@intel.com> | 2011-09-09 20:29:17 +0000 |
---|---|---|
committer | Nadav Rotem <nadav.rotem@intel.com> | 2011-09-09 20:29:17 +0000 |
commit | 8ffad56f8eb41c73ecf40d1aa473819eb6915c12 (patch) | |
tree | 7f7a730bb243452a506dc5f67d3f275ac1b46a05 /lib | |
parent | 468709e43dfff52f48af9ff411d461e22b6e2015 (diff) | |
download | llvm-8ffad56f8eb41c73ecf40d1aa473819eb6915c12.tar.gz llvm-8ffad56f8eb41c73ecf40d1aa473819eb6915c12.tar.bz2 llvm-8ffad56f8eb41c73ecf40d1aa473819eb6915c12.tar.xz |
Implement vector-select support for avx256. Refactor the vblend implementation to have tablegen match the instruction by the node type
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139400 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 28 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 49 |
4 files changed, 63 insertions, 30 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 171349c066..0d658492e5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1019,6 +1019,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4f64, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i64, Custom); + setOperationAction(ISD::VSELECT, MVT::v8i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v8f32, Custom); + setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); setOperationAction(ISD::ADD, MVT::v16i16, Custom); @@ -8706,14 +8711,21 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op1.getValueType(); switch (VT.getSimpleVT().SimpleTy) { default: break; + // SSE4: case MVT::v2i64: case MVT::v2f64: - return DAG.getNode(X86ISD::BLENDVPD, DL, VT, Ops, array_lengthof(Ops)); case MVT::v4i32: case MVT::v4f32: - return DAG.getNode(X86ISD::BLENDVPS, DL, VT , Ops, array_lengthof(Ops)); case MVT::v16i8: - return DAG.getNode(X86ISD::PBLENDVB, DL, VT , Ops, array_lengthof(Ops)); + case MVT::v8i16: + // AVX: + case MVT::v4i64: + case MVT::v4f64: + case MVT::v8i32: + case MVT::v8f32: + case MVT::v32i8: + case MVT::v16i16: + return DAG.getNode(X86ISD::BLENDV, DL, VT, Ops, array_lengthof(Ops)); } return SDValue(); @@ -9973,7 +9985,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -9988,12 +10000,12 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, + R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); return R; } @@ -10631,7 +10643,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; - case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB"; + case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; @@ -13361,7 +13373,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask); + Mask = DAG.getNode(X86ISD::BLENDV, DL, MVT::v16i8, X, Y, Mask); return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3051e16485..bd04de150d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,10 +175,8 @@ namespace llvm { /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, - /// BLENDVXX family of opcodes - PBLENDVB, - BLENDVPD, - BLENDVPS, + /// BLEND family of opcodes + BLENDV, /// FMAX, FMIN - Floating point max and min. /// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c2db9177cd..f25435f85e 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -58,14 +58,8 @@ def X86psignw : SDNode<"X86ISD::PSIGNW", def X86psignd : SDNode<"X86ISD::PSIGND", SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pblendvb : SDNode<"X86ISD::PBLENDVB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; -def X86blendvpd : SDNode<"X86ISD::BLENDVPD", - SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; -def X86blendvps : SDNode<"X86ISD::BLENDVPS", - SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, +def X86blendv : SDNode<"X86ISD::BLENDV", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5071a36192..cf363354c7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5868,12 +5868,37 @@ defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; let Predicates = [HasAVX] in { - def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask), - (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; - def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask), - (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; - def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask), - (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), + VR128:$mask)), + (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), + VR128:$mask)), + (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), + VR128:$mask)), + (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), + VR128:$mask)), + (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), + VR128:$mask)), + (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; + + +def : Pat<(v8i32 (X86blendv (v8i32 VR256:$src1), (v8i32 VR256:$src2), + VR256:$mask)), + (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v8f32 (X86blendv (v8f32 VR256:$src1), (v8f32 VR256:$src2), + VR256:$mask)), + (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; + + +def : Pat<(v4i64 (X86blendv (v4i64 VR256:$src1), (v4i64 VR256:$src2), + VR256:$mask)), + (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v4f64 (X86blendv (v4f64 VR256:$src1), (v4f64 VR256:$src2), + VR256:$mask)), + (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -5901,12 +5926,16 @@ defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; let Predicates = [HasSSE41] in { - def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0), + def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), XMM0)), (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0), - (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0), + def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), XMM0)), + (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), XMM0)), (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), XMM0)), + (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), XMM0)), + (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX] in |