summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2013-08-07 12:34:55 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2013-08-07 12:34:55 +0000
commit207600d2cfa2b06bfeb0c1670f198f1aa1a1aa58 (patch)
tree2315c6ff0ba41f225af5bd289eb7dab7f7851eaf /lib
parent29e873ddb6b21c4a934926a0cf7809e98ac1fff0 (diff)
downloadllvm-207600d2cfa2b06bfeb0c1670f198f1aa1a1aa58.tar.gz
llvm-207600d2cfa2b06bfeb0c1670f198f1aa1a1aa58.tar.bz2
llvm-207600d2cfa2b06bfeb0c1670f198f1aa1a1aa58.tar.xz
AVX-512 set: Added BROADCAST instructions
with lowering logic and a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187884 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--lib/Target/X86/X86ISelLowering.h4
-rw-r--r--lib/Target/X86/X86InstrAVX512.td126
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td5
4 files changed, 146 insertions, 7 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 592e5b37ac..00b4976b50 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5406,7 +5406,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getValueType().getSimpleVT();
SDLoc dl(Op);
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");
SDValue Ld;
@@ -5462,13 +5462,18 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
// The scalar_to_vector node and the suspected
// load node must have exactly one user.
// Constants may have multiple users.
- if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse()))
+
+ // AVX-512 has register version of the broadcast
+ bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
+ Ld.getValueType().getSizeInBits() >= 32;
+ if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
+ !hasRegVer))
return SDValue();
break;
}
}
- bool Is256 = VT.is256BitVector();
+ bool IsGE256 = (VT.getSizeInBits() >= 256);
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
@@ -5478,7 +5483,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
assert(!CVT.isVector() && "Must not broadcast a vector type");
unsigned ScalarSize = CVT.getSizeInBits();
- if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) {
+ if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
const Constant *C = 0;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
@@ -5502,14 +5507,14 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
// Handle AVX2 in-register broadcasts.
if (!IsLoad && Subtarget->hasInt256() &&
- (ScalarSize == 32 || (Is256 && ScalarSize == 64)))
+ (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The scalar source must be a normal load.
if (!IsLoad)
return SDValue();
- if (ScalarSize == 32 || (Is256 && ScalarSize == 64))
+ if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
@@ -13230,6 +13235,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 487d82947d..270327495d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -321,6 +321,8 @@ namespace llvm {
VPERMI,
VPERM2X128,
VBROADCAST,
+ // masked broadcast
+ VBROADCASTM,
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
@@ -852,7 +854,9 @@ namespace llvm {
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 18ccdc35ae..8abae14f94 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -347,6 +347,132 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX;
+//===---------------------------------------------------------------------===//
+// AVX-512 BROADCAST
+//---
+multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
+ RegisterClass DestRC,
+ RegisterClass SrcRC, X86MemOperand x86memop> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),[]>, EVEX;
+}
+let ExeDomain = SSEPackedSingle in {
+ defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512,
+ VR128X, f32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+ defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512,
+ VR128X, f64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
+
+def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSZrm addr:$src)>;
+def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSDZrm addr:$src)>;
+
+multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
+ RegisterClass SrcRC, RegisterClass KRC> {
+ def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V512;
+ def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
+ (ins KRC:$mask, SrcRC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ []>, EVEX, EVEX_V512, EVEX_KZ;
+}
+
+defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
+defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
+ VEX_W;
+
+def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
+ (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
+
+def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
+ (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
+
+def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
+ (VPBROADCASTDrZrr GR32:$src)>;
+def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
+ (VPBROADCASTQrZrr GR64:$src)>;
+
+multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
+ RegisterClass KRC> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
+ def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
+ VR128X:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
+ EVEX, EVEX_KZ;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
+ def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
+ x86memop:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
+ [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
+ (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
+}
+
+defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
+ loadi32, VR512, v16i32, v4i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
+ loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+
+def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
+ (VBROADCASTSSZrr VR128X:$src)>;
+def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
+ (VBROADCASTSDZrr VR128X:$src)>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
+ (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
+ (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+
+
+let Predicates = [HasAVX512] in {
+def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
+ (EXTRACT_SUBREG
+ (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ addr:$src)), sub_ymm)>;
+}
+//===----------------------------------------------------------------------===//
+// AVX-512 BROADCAST MASK TO VECTOR REGISTER
+//---
+
+multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
+ RegisterClass DstRC, RegisterClass KRC,
+ ValueType OpVT, ValueType SrcVT> {
+def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+}
+
+defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
+ VK16, v16i32, v16i1>, EVEX_V512;
+defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
+ VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index db53af00b5..0b51521fee 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -157,7 +157,9 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
@@ -196,6 +198,7 @@ def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;