summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-21 08:26:50 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-21 08:26:50 +0000
commit6fa583d78701390079db0cc4d944823af06023c6 (patch)
treef651bde86e820e2e95babc822e65505cb94a33ea
parent3b73312020156594e3b9da5424bff0d77070ff48 (diff)
downloadllvm-6fa583d78701390079db0cc4d944823af06023c6.tar.gz
llvm-6fa583d78701390079db0cc4d944823af06023c6.tar.bz2
llvm-6fa583d78701390079db0cc4d944823af06023c6.tar.xz
Lowering for v32i8 to VPUNPCKLBW/VPUNPCKHBW when AVX2 is enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145028 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td94
-rw-r--r--test/CodeGen/X86/avx2-unpack.ll14
5 files changed, 44 insertions, 84 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4f7bf15a79..a5bfe1ac8f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2852,6 +2852,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::VPUNPCKLWDY:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
case X86ISD::UNPCKHPS:
@@ -2863,6 +2864,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
case X86ISD::VPUNPCKHWDY:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
case X86ISD::VPERMILPS:
@@ -2939,6 +2941,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::VPUNPCKLWDY:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
case X86ISD::UNPCKHPS:
@@ -2950,6 +2953,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
case X86ISD::VPUNPCKHWDY:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
return DAG.getNode(Opc, dl, VT, V1, V2);
@@ -3569,7 +3573,7 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || NumElts != 16))
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3619,7 +3623,7 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || NumElts != 16))
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -4639,6 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHWDY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
@@ -4654,6 +4659,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLWDY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
@@ -6595,6 +6601,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
case MVT::v16i16: return X86ISD::VPUNPCKLWDY;
+ case MVT::v32i8: return X86ISD::VPUNPCKLBWY;
default:
llvm_unreachable("Unknown type for unpckl");
}
@@ -6618,6 +6625,7 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
case MVT::v16i16: return X86ISD::VPUNPCKHWDY;
+ case MVT::v32i8: return X86ISD::VPUNPCKHBWY;
default:
llvm_unreachable("Unknown type for unpckh");
}
@@ -11270,6 +11278,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::VPUNPCKLBWY: return "X86ISD::VPUNPCKLBWY";
case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY";
case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY";
case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY";
@@ -11277,6 +11286,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::VPUNPCKHBWY: return "X86ISD::VPUNPCKHBWY";
case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY";
case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY";
case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY";
@@ -14867,6 +14877,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
+ case X86ISD::VPUNPCKHBWY:
case X86ISD::VPUNPCKHWDY:
case X86ISD::VPUNPCKHDQY:
case X86ISD::VPUNPCKHQDQY:
@@ -14878,6 +14889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
+ case X86ISD::VPUNPCKLBWY:
case X86ISD::VPUNPCKLWDY:
case X86ISD::VPUNPCKLDQY:
case X86ISD::VPUNPCKLQDQY:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7bb4da6581..36cb1526fd 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -285,6 +285,7 @@ namespace llvm {
PUNPCKLWD,
PUNPCKLDQ,
PUNPCKLQDQ,
+ VPUNPCKLBWY,
VPUNPCKLWDY,
VPUNPCKLDQY,
VPUNPCKLQDQY,
@@ -292,6 +293,7 @@ namespace llvm {
PUNPCKHWD,
PUNPCKHDQ,
PUNPCKHQDQ,
+ VPUNPCKHBWY,
VPUNPCKHWDY,
VPUNPCKHDQY,
VPUNPCKHQDQY,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index c4d311f8d4..7e8bc04463 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -144,6 +144,7 @@ def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+def X86Punpcklbwy : SDNode<"X86ISD::VPUNPCKLBWY", SDTShuff2Op>;
def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>;
def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>;
def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>;
@@ -152,6 +153,7 @@ def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86Punpckhbwy : SDNode<"X86ISD::VPUNPCKHBWY", SDTShuff2Op>;
def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>;
def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>;
def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index cfb8c850c8..94bd8251ef 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4204,19 +4204,8 @@ let Predicates = [HasAVX] in {
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+ bc_v2i64, 0>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
bc_v16i8, 0>, VEX_4V;
@@ -4224,99 +4213,40 @@ let Predicates = [HasAVX] in {
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+ bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
- defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbwy,
bc_v32i8>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy,
bc_v16i16>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy,
bc_v8i32>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdqy,
+ bc_v4i64>, VEX_4V;
- /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1,
- VR256:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1,
- (memopv4i64 addr:$src2))))]>, VEX_4V;
-
- defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbwy,
bc_v32i8>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy,
bc_v16i16>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy,
bc_v8i32>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1,
- VR256:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1,
- (memopv4i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdqy,
+ bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, bc_v2i64>;
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, bc_v2i64>;
}
} // ExeDomain = SSEPackedInt
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
index 51c0f16789..aa973089ef 100644
--- a/test/CodeGen/X86/avx2-unpack.ll
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -41,3 +41,17 @@ entry:
%shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
ret <16 x i16> %shuffle.i
}
+
+; CHECK: vpunpckhbw
+define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+ %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+ ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+ %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+ ret <32 x i8> %shuffle.i
+}