summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp34
-rw-r--r--lib/Target/X86/X86InstrMMX.td17
-rw-r--r--test/CodeGen/X86/2009-06-05-VZextByteShort.ll37
3 files changed, 59 insertions, 29 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1de51ee51a..ec6a5bad5b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2428,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
- int NumElts = VT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4)
+ if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
+
+ int NumElts = VT.getVectorNumElements();
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
@@ -3082,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
// Special case for single non-zero, non-undef, element.
- if (NumNonZero == 1 && NumElems <= 4) {
+ if (NumNonZero == 1) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDValue Item = Op.getOperand(Idx);
@@ -3123,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
- // depending on what the source datatype is. Because we can only get here
- // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
- if (Idx == 0 &&
- // Don't do this for i64 values on x86-32.
- (EVT != MVT::i64 || Subtarget->is64Bit())) {
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
- Subtarget->hasSSE2(), DAG);
+ // depending on what the source datatype is.
+ if (Idx == 0) {
+ if (NumZero == 0) {
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
+ (EVT == MVT::i64 && Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+ DAG);
+ } else if (EVT == MVT::i16 || EVT == MVT::i8) {
+ Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+ MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+ }
}
// Is it a vector logical left shift?
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 38c54712f1..43fadc219b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -577,30 +577,13 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
-// Move scalar to MMX zero-extended
-// movd to MMX register zero-extends
-let AddedComplexity = 15 in {
- def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
- def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
-}
-
let AddedComplexity = 20 in {
- def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
- def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
(MMX_MOVZDI2PDIrm addr:$src)>;
}
// Clear top half.
let AddedComplexity = 15 in {
- def : Pat<(v8i8 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
- def : Pat<(v4i16 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
def : Pat<(v2i32 (X86vzmovl VR64:$src)),
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
}
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
new file mode 100644
index 0000000000..220423aa98
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: grep movzwl %t1 | count 2
+; RUN: grep movzbl %t1 | count 2
+; RUN: grep movd %t1 | count 4
+
+define <4 x i16> @a(i32* %x1) nounwind {
+ %x2 = load i32* %x1
+ %x3 = lshr i32 %x2, 1
+ %x = trunc i32 %x3 to i16
+ %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
+ ret <4 x i16> %r
+}
+
+define <8 x i16> @b(i32* %x1) nounwind {
+ %x2 = load i32* %x1
+ %x3 = lshr i32 %x2, 1
+ %x = trunc i32 %x3 to i16
+ %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
+ ret <8 x i16> %r
+}
+
+define <8 x i8> @c(i32* %x1) nounwind {
+ %x2 = load i32* %x1
+ %x3 = lshr i32 %x2, 1
+ %x = trunc i32 %x3 to i8
+ %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
+ ret <8 x i8> %r
+}
+
+define <16 x i8> @d(i32* %x1) nounwind {
+ %x2 = load i32* %x1
+ %x3 = lshr i32 %x2, 1
+ %x = trunc i32 %x3 to i8
+ %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
+ ret <16 x i8> %r
+}
+