summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2012-04-06 07:45:23 +0000
committerCraig Topper <craig.topper@gmail.com>2012-04-06 07:45:23 +0000
commit9a2b6e1d7b26069fca0cac7766fbe1b29d710f23 (patch)
tree33b56aeef410706b5a52d52e5e387de015a2f294
parente45cddfa08992ccac052b344f52c92d66e4797ea (diff)
downloadllvm-9a2b6e1d7b26069fca0cac7766fbe1b29d710f23.tar.gz
llvm-9a2b6e1d7b26069fca0cac7766fbe1b29d710f23.tar.bz2
llvm-9a2b6e1d7b26069fca0cac7766fbe1b29d710f23.tar.xz
Allow 256-bit shuffles to be split if a 128-bit lane contains elements from a single source. This is a rewrite of the 256-bit shuffle splitting code based on similar code from legalize types. Fixes PR12413.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154166 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp127
-rw-r--r--test/CodeGen/X86/avx-vpermil.ll3
2 files changed, 57 insertions, 73 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e80bb87470..7f008a2afe 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5836,96 +5836,79 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
- int MinRange[2][2] = { { static_cast<int>(NumElems),
- static_cast<int>(NumElems) },
- { static_cast<int>(NumElems),
- static_cast<int>(NumElems) } };
- int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } };
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ SDValue Shufs[2];
- // Collect used ranges for each source in each lane
+ SmallVector<int, 16> Mask;
for (unsigned l = 0; l < 2; ++l) {
- unsigned LaneStart = l*NumLaneElems;
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ int InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned LaneStart = l * NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
+ // The mask element. This indexes into the input.
int Idx = SVOp->getMaskElt(i+LaneStart);
- if (Idx < 0)
+ if (Idx < 0) {
+ // the mask element does not index into any input vector.
+ Mask.push_back(-1);
continue;
-
- int Input = 0;
- if (Idx >= (int)NumElems) {
- Idx -= NumElems;
- Input = 1;
}
- if (Idx > MaxRange[l][Input])
- MaxRange[l][Input] = Idx;
- if (Idx < MinRange[l][Input])
- MinRange[l][Input] = Idx;
- }
- }
+ // The input vector this mask element indexes into.
+ int Input = Idx / NumLaneElems;
- // Make sure each range is 128-bits
- int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } };
- for (unsigned l = 0; l < 2; ++l) {
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0)
- continue;
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NumLaneElems;
- if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems)
- ExtractIdx[l][Input] = 0;
- else if (MinRange[l][Input] >= (int)NumLaneElems &&
- MaxRange[l][Input] < (int)NumElems)
- ExtractIdx[l][Input] = NumLaneElems;
- else
- return SDValue();
- }
- }
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input)
+ // This input vector is already an operand.
+ break;
+ if (InputUsed[OpNo] < 0) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
- DebugLoc dl = SVOp->getDebugLoc();
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NVT = MVT::getVectorVT(EltVT, NumElems/2);
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up.
+ return SDValue();
+ }
- SDValue Ops[2][2];
- for (unsigned l = 0; l < 2; ++l) {
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (ExtractIdx[l][Input] >= 0)
- Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input),
- DAG.getConstant(ExtractIdx[l][Input], MVT::i32),
- DAG, dl);
- else
- Ops[l][Input] = DAG.getUNDEF(NVT);
+ // Add the mask index for the new shuffle vector.
+ Mask.push_back(Idx + OpNo * NumLaneElems);
}
- }
- // Generate 128-bit shuffles
- SmallVector<int, 16> Mask1, Mask2;
- for (unsigned i = 0; i != NumLaneElems; ++i) {
- int Elt = SVOp->getMaskElt(i);
- if (Elt >= (int)NumElems) {
- Elt %= NumLaneElems;
- Elt += NumLaneElems;
- } else if (Elt >= 0) {
- Elt %= NumLaneElems;
- }
- Mask1.push_back(Elt);
- }
- for (unsigned i = NumLaneElems; i != NumElems; ++i) {
- int Elt = SVOp->getMaskElt(i);
- if (Elt >= (int)NumElems) {
- Elt %= NumLaneElems;
- Elt += NumLaneElems;
- } else if (Elt >= 0) {
- Elt %= NumLaneElems;
+ if (InputUsed[0] < 0) {
+ // No input vectors were used! The result is undefined.
+ Shufs[l] = DAG.getUNDEF(NVT);
+ } else {
+ SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
+ DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32),
+ DAG, dl);
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
+ Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
+ DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32),
+ DAG, dl);
+ // At least one input vector was used. Create a new shuffle vector.
+ Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
}
- Mask2.push_back(Elt);
- }
- SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]);
- SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]);
+ Mask.clear();
+ }
// Concatenate the result back
- SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1,
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0],
DAG.getConstant(0, MVT::i32), DAG, dl);
- return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32),
+ return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32),
DAG, dl);
}
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 9707cd9b54..cb904b9331 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,7 +45,8 @@ entry:
ret <8 x float> %shuffle
}
-; CHECK: vpermilps
+; CHECK: palignr
+; CHECK: palignr
define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>