summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Friedman <eli.friedman@gmail.com>2009-06-07 06:52:44 +0000
committerEli Friedman <eli.friedman@gmail.com>2009-06-07 06:52:44 +0000
commit7a5e55509b99d579d56d126a7b503ec6fe153a8f (patch)
treea75cc13d22340b807d3939f42886d26cf476aaf9
parentdcef849ab0f1f8deec2f9bad0a8f0371e88dc713 (diff)
downloadllvm-7a5e55509b99d579d56d126a7b503ec6fe153a8f.tar.gz
llvm-7a5e55509b99d579d56d126a7b503ec6fe153a8f.tar.bz2
llvm-7a5e55509b99d579d56d126a7b503ec6fe153a8f.tar.xz
Slightly generalize the code that handles shuffles of consecutive loads
on x86 to handle more cases. Fix a bug in said code that would cause it to read past the end of an object. Rewrite the code in SelectionDAGLegalize::ExpandBUILD_VECTOR to be a bit more general. Remove PerformBuildVectorCombine, which is no longer necessary with these changes. In addition to simplifying the code, with this change, we can now catch a few more cases of consecutive loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73012 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp115
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp143
-rw-r--r--test/CodeGen/X86/vec_loadsingles.ll12
-rw-r--r--test/CodeGen/X86/vec_set-5.ll3
-rw-r--r--test/CodeGen/X86/vec_set-6.ll2
5 files changed, 90 insertions, 185 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5ae183e2fa..f3c2833e0f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1785,48 +1785,41 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
/// support the operation, but do support the resultant vector type.
SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned NumElems = Node->getNumOperands();
- SDValue SplatValue = Node->getOperand(0);
+ SDValue Value1, Value2;
DebugLoc dl = Node->getDebugLoc();
MVT VT = Node->getValueType(0);
- MVT OpVT = SplatValue.getValueType();
+ MVT OpVT = Node->getOperand(0).getValueType();
MVT EltVT = VT.getVectorElementType();
// If the only non-undef value is the low element, turn this into a
// SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
bool isOnlyLowElement = true;
-
- // FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
- // and use a bitmask instead of a list of elements.
- // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
- std::map<SDValue, std::vector<unsigned> > Values;
- Values[SplatValue].push_back(0);
+ bool MoreThanTwoValues = false;
bool isConstant = true;
- if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
- SplatValue.getOpcode() != ISD::UNDEF)
- isConstant = false;
-
- for (unsigned i = 1; i < NumElems; ++i) {
+ for (unsigned i = 0; i < NumElems; ++i) {
SDValue V = Node->getOperand(i);
- Values[V].push_back(i);
- if (V.getOpcode() != ISD::UNDEF)
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
isOnlyLowElement = false;
- if (SplatValue != V)
- SplatValue = SDValue(0, 0);
-
- // If this isn't a constant element or an undef, we can't use a constant
- // pool load.
- if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
- V.getOpcode() != ISD::UNDEF)
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
}
- if (isOnlyLowElement) {
- // If the low element is an undef too, then this whole things is an undef.
- if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
- return DAG.getUNDEF(VT);
- // Otherwise, turn this into a scalar_to_vector node.
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
- }
// If all elements are constants, create a load from the constant pool.
if (isConstant) {
@@ -1852,59 +1845,25 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
false, Alignment);
}
- if (SplatValue.getNode()) { // Splat of one value?
- // Build the shuffle constant vector: <0, 0, 0, 0>
- SmallVector<int, 8> ZeroVec(NumElems, 0);
-
- // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
- if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) {
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
// Get the splatted value into the low element of a vector register.
- SDValue LowValVec =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
// Return shuffle(LowValVec, undef, <0,0,0,0>)
- return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
- &ZeroVec[0]);
- }
- }
-
- // If there are only two unique elements, we may be able to turn this into a
- // vector shuffle.
- if (Values.size() == 2) {
- // Get the two values in deterministic order.
- SDValue Val1 = Node->getOperand(1);
- SDValue Val2;
- std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin();
- if (MI->first != Val1)
- Val2 = MI->first;
- else
- Val2 = (++MI)->first;
-
- // If Val1 is an undef, make sure it ends up as Val2, to ensure that our
- // vector shuffle has the undef vector on the RHS.
- if (Val1.getOpcode() == ISD::UNDEF)
- std::swap(Val1, Val2);
-
- // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
- SmallVector<int, 8> ShuffleMask(NumElems, -1);
-
- // Set elements of the shuffle mask for Val1.
- std::vector<unsigned> &Val1Elts = Values[Val1];
- for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
- ShuffleMask[Val1Elts[i]] = 0;
-
- // Set elements of the shuffle mask for Val2.
- std::vector<unsigned> &Val2Elts = Values[Val2];
- for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
- if (Val2.getOpcode() != ISD::UNDEF)
- ShuffleMask[Val2Elts[i]] = NumElems;
-
- // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
- if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
- TLI.isShuffleMaskLegal(ShuffleMask, VT)) {
- Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
- Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
- return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
}
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ef60ff5e09..7d18b968f7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7691,13 +7691,15 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
}
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
- MVT EVT, SDNode *&Base,
+ MVT EVT, LoadSDNode *&LDBase,
+ unsigned &LastLoadedElt,
SelectionDAG &DAG, MachineFrameInfo *MFI,
const TargetLowering &TLI) {
- Base = NULL;
+ LDBase = NULL;
+ LastLoadedElt = -1;
for (unsigned i = 0; i < NumElems; ++i) {
if (N->getMaskElt(i) < 0) {
- if (!Base)
+ if (!LDBase)
return false;
continue;
}
@@ -7706,19 +7708,20 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return false;
- if (!Base) {
- Base = Elt.getNode();
- if (Base->getOpcode() == ISD::UNDEF)
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
return false;
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
continue;
}
if (Elt.getOpcode() == ISD::UNDEF)
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- LoadSDNode *LDBase = cast<LoadSDNode>(Base);
if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
return false;
+ LastLoadedElt = i;
}
return true;
}
@@ -7737,6 +7740,9 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
unsigned NumElems = VT.getVectorNumElements();
+ if (VT.getSizeInBits() != 128)
+ return SDValue();
+
// For x86-32 machines, if we see an insert and then a shuffle in a v2i64
// where the upper half is 0, it is advantageous to rewrite it as a build
// vector of (0, val) so it can use movq.
@@ -7764,107 +7770,24 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
// Try to combine a vector_shuffle into a 128-bit load.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- SDNode *Base = NULL;
- if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI))
+ LoadSDNode *LD = NULL;
+ unsigned LastLoadedElt;
+ if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG,
+ MFI, TLI))
return SDValue();
- LoadSDNode *LD = cast<LoadSDNode>(Base);
- if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI))
+ if (LastLoadedElt == NumElems - 1) {
+ if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+ return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile());
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile());
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->getAlignment());
-}
-
-/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
-static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget,
- const TargetLowering &TLI) {
- unsigned NumOps = N->getNumOperands();
- DebugLoc dl = N->getDebugLoc();
-
- // Ignore single operand BUILD_VECTOR.
- if (NumOps == 1)
- return SDValue();
-
- MVT VT = N->getValueType(0);
- MVT EVT = VT.getVectorElementType();
-
- // Before or during type legalization, we want to try and convert a
- // build_vector of an i64 load and a zero value into vzext_movl before the
- // legalizer can break it up.
- // FIXME: does the case below remove the need to do this?
- if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
- if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
- return SDValue();
-
- // This must be an insertion into a zero vector.
- SDValue HighElt = N->getOperand(1);
- if (!isZeroNode(HighElt))
- return SDValue();
-
- // Value must be a load.
- SDNode *Base = N->getOperand(0).getNode();
- if (!isa<LoadSDNode>(Base)) {
- if (Base->getOpcode() != ISD::BIT_CONVERT)
- return SDValue();
- Base = Base->getOperand(0).getNode();
- if (!isa<LoadSDNode>(Base))
- return SDValue();
- }
-
- // Transform it into VZEXT_LOAD addr.
- LoadSDNode *LD = cast<LoadSDNode>(Base);
-
- // Load must not be an extload.
- if (LD->getExtensionType() != ISD::NON_EXTLOAD)
- return SDValue();
-
- // Load type should legal type so we don't have to legalize it.
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- TargetLowering::TargetLoweringOpt TLO(DAG);
- TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
- DCI.CommitTargetLoweringOpt(TLO);
- return ResNode;
- }
-
- // The type legalizer will have broken apart v2i64 build_vector created during
- // widening before the code which handles that case is run. Look for build
- // vector (load, load + 4, 0/undef, 0/undef)
- if (VT == MVT::v4i32 || VT == MVT::v4f32) {
- LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
- if (!LD0 || !LD1)
- return SDValue();
- if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
- LD1->getExtensionType() != ISD::NON_EXTLOAD)
- return SDValue();
- // Make sure the second elt is a consecutive load.
- if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
- DAG.getMachineFunction().getFrameInfo()))
- return SDValue();
-
- SDValue N2 = N->getOperand(2);
- SDValue N3 = N->getOperand(3);
- if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
- return SDValue();
- if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
- return SDValue();
-
+ LD->isVolatile(), LD->getAlignment());
+ } else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
- SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
+ SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- TargetLowering::TargetLoweringOpt TLO(DAG);
- TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
- DCI.CommitTargetLoweringOpt(TLO);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
}
return SDValue();
@@ -8466,14 +8389,25 @@ static SDValue PerformBTCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BIT_CONVERT)
+ Op = Op.getOperand(0);
+ MVT VT = N->getValueType(0), OpVT = Op.getValueType();
+ if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
+ VT.getVectorElementType().getSizeInBits() ==
+ OpVT.getVectorElementType().getSizeInBits()) {
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+ }
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
- case ISD::BUILD_VECTOR:
- return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
@@ -8485,6 +8419,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
+ case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
}
return SDValue();
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
new file mode 100644
index 0000000000..67122763ec
--- /dev/null
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
+entry:
+ %tmp1 = load float* %p
+ %vecins = insertelement <4 x float> undef, float %tmp1, i32 0
+ %add.ptr = getelementptr float* %p, i32 1
+ %tmp5 = load float* %add.ptr
+ %vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
+ ret <4 x float> %vecins7
+}
+
diff --git a/test/CodeGen/X86/vec_set-5.ll b/test/CodeGen/X86/vec_set-5.ll
index 4fc652c022..d332970111 100644
--- a/test/CodeGen/X86/vec_set-5.ll
+++ b/test/CodeGen/X86/vec_set-5.ll
@@ -1,7 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
; RUN: grep movlhps %t | count 1
-; RUN: grep movq %t | count 1
-; RUN: grep movsd %t | count 1
+; RUN: grep movq %t | count 2
define <4 x float> @test1(float %a, float %b) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll
index 02df526cee..c7b6747a86 100644
--- a/test/CodeGen/X86/vec_set-6.ll
+++ b/test/CodeGen/X86/vec_set-6.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
; RUN: grep movss %t | count 1
-; RUN: grep movups %t | count 1
+; RUN: grep movq %t | count 1
; RUN: grep shufps %t | count 1
define <4 x float> @test(float %a, float %b, float %c) nounwind {