summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-26 20:47:44 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-26 20:47:44 +0000
commit06cb680779597c35e6b6399dea6f10276273970b (patch)
tree9eeca624d87875692d9dd5e001efe94cb02838bb /lib
parent933a78c0d94b8d059cedc24dd5c14b5fe7b04c42 (diff)
downloadllvm-06cb680779597c35e6b6399dea6f10276273970b.tar.gz
llvm-06cb680779597c35e6b6399dea6f10276273970b.tar.bz2
llvm-06cb680779597c35e6b6399dea6f10276273970b.tar.xz
Collapse X86ISD node types for PUNPCKH*, PUNPCKL*, UNPCKLP*, and UNPCKHP* to not be type specific. Now we just have integer high and low and floating point high and low. Pattern matching will choose the correct instruction based on the vector type.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145148 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp128
-rw-r--r--lib/Target/X86/X86ISelLowering.h16
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td18
-rw-r--r--lib/Target/X86/X86InstrSSE.td132
4 files changed, 116 insertions, 178 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 96c6f41071..6ebba0e456 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2843,18 +2843,10 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKLP:
+ case X86ISD::PUNPCKL:
+ case X86ISD::UNPCKHP:
+ case X86ISD::PUNPCKH:
case X86ISD::VPERMILPS:
case X86ISD::VPERMILPSY:
case X86ISD::VPERMILPD:
@@ -2920,18 +2912,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKLP:
+ case X86ISD::PUNPCKL:
+ case X86ISD::UNPCKHP:
+ case X86ISD::PUNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
return SDValue();
@@ -4635,24 +4619,16 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
+ case X86ISD::PUNPCKH:
DecodePUNPCKHMask(NumElems, ShuffleMask);
break;
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
+ case X86ISD::UNPCKHP:
DecodeUNPCKHPMask(VT, ShuffleMask);
break;
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
+ case X86ISD::PUNPCKL:
DecodePUNPCKLMask(VT, ShuffleMask);
break;
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
+ case X86ISD::UNPCKLP:
DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
@@ -6568,22 +6544,20 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKLDQ;
- case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v32i8:
+ case MVT::v16i8:
+ case MVT::v16i16:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64: return X86ISD::PUNPCKL;
case MVT::v8i32:
- if (HasAVX2) return X86ISD::PUNPCKLDQ;
- // else use fp unit for int unpack.
- case MVT::v8f32:
- case MVT::v4f32: return X86ISD::UNPCKLPS;
case MVT::v4i64:
- if (HasAVX2) return X86ISD::PUNPCKLQDQ;
+ if (HasAVX2) return X86ISD::PUNPCKL;
// else use fp unit for int unpack.
+ case MVT::v8f32:
+ case MVT::v4f32:
case MVT::v4f64:
- case MVT::v2f64: return X86ISD::UNPCKLPD;
- case MVT::v32i8:
- case MVT::v16i8: return X86ISD::PUNPCKLBW;
- case MVT::v16i16:
- case MVT::v8i16: return X86ISD::PUNPCKLWD;
+ case MVT::v2f64: return X86ISD::UNPCKLP;
default:
llvm_unreachable("Unknown type for unpckl");
}
@@ -6592,22 +6566,20 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKHDQ;
- case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v32i8:
+ case MVT::v16i8:
+ case MVT::v16i16:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64: return X86ISD::PUNPCKH;
+ case MVT::v4i64:
case MVT::v8i32:
- if (HasAVX2) return X86ISD::PUNPCKHDQ;
+ if (HasAVX2) return X86ISD::PUNPCKH;
// else use fp unit for int unpack.
case MVT::v8f32:
- case MVT::v4f32: return X86ISD::UNPCKHPS;
- case MVT::v4i64:
- if (HasAVX2) return X86ISD::PUNPCKHQDQ;
- // else use fp unit for int unpack.
+ case MVT::v4f32:
case MVT::v4f64:
- case MVT::v2f64: return X86ISD::UNPCKHPD;
- case MVT::v32i8:
- case MVT::v16i8: return X86ISD::PUNPCKHBW;
- case MVT::v16i16:
- case MVT::v8i16: return X86ISD::PUNPCKHWD;
+ case MVT::v2f64: return X86ISD::UNPCKHP;
default:
llvm_unreachable("Unknown type for unpckh");
}
@@ -6910,9 +6882,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKLP, dl, VT, V1, V1, DAG);
if (VT == MVT::v2i64)
- return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(X86ISD::PUNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT))
@@ -11266,18 +11238,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
- case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
- case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
- case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
- case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
- case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
- case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
- case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
- case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
- case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
- case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
- case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP";
+ case X86ISD::UNPCKHP: return "X86ISD::UNPCKHP";
+ case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL";
+ case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
@@ -14857,18 +14821,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
case X86ISD::PALIGN:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKH:
+ case X86ISD::UNPCKHP:
+ case X86ISD::PUNPCKL:
+ case X86ISD::UNPCKLP:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ccff3a5ea6..582b6b522c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -273,18 +273,10 @@ namespace llvm {
MOVLPD,
MOVSD,
MOVSS,
- UNPCKLPS,
- UNPCKLPD,
- UNPCKHPS,
- UNPCKHPD,
- PUNPCKLBW,
- PUNPCKLWD,
- PUNPCKLDQ,
- PUNPCKLQDQ,
- PUNPCKHBW,
- PUNPCKHWD,
- PUNPCKHDQ,
- PUNPCKHQDQ,
+ UNPCKLP,
+ UNPCKHP,
+ PUNPCKL,
+ PUNPCKH,
VPERMILPS,
VPERMILPSY,
VPERMILPD,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 791bbe6566..32392dd552 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,21 +130,11 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>;
+def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>;
-def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
-
-def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
-def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
-def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
-def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
-
-def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
-def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
-def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
-def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>;
+def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;
def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7cadac16d7..87df492121 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1159,11 +1159,11 @@ let Predicates = [HasAVX] in {
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
@@ -1174,10 +1174,10 @@ let Predicates = [HasAVX] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
}
@@ -1194,16 +1194,16 @@ let Predicates = [HasSSE1] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [HasSSE2] in {
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
@@ -1214,7 +1214,7 @@ let Predicates = [HasSSE2] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
}
@@ -2430,27 +2430,27 @@ let AddedComplexity = 10 in {
} // AddedComplexity
let Predicates = [HasSSE1] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasSSE2] in {
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@@ -2463,59 +2463,59 @@ let Predicates = [HasSSE2] in {
}
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
+ def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
+ def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@@ -4198,62 +4198,62 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
}
let Predicates = [HasAVX] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl,
bc_v4i32, 0>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl,
bc_v2i64, 0>, VEX_4V;
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh,
bc_v4i32, 0>, VEX_4V;
- defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh,
bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
- defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl,
bc_v32i8>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl,
bc_v16i16>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl,
bc_v8i32>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq,
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl,
bc_v4i64>, VEX_4V;
- defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh,
bc_v32i8>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh,
bc_v16i16>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh,
bc_v8i32>, VEX_4V;
- defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq,
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh,
bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw,
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl,
bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd,
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl,
bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq,
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl,
bc_v4i32>;
- defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq,
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl,
bc_v2i64>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw,
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh,
bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd,
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh,
bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq,
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh,
bc_v4i32>;
- defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq,
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh,
bc_v2i64>;
}
} // ExeDomain = SSEPackedInt