summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2012-12-22 11:34:28 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2012-12-22 11:34:28 +0000
commit17347912b46213658074416133396caffd034e0c (patch)
tree7ffd4581245e74f4d8922c2a459d7231c3a3808d /lib/Target/X86/X86ISelLowering.cpp
parentb44c1f90e465a1905cff00212929520ab1f36b64 (diff)
downloadllvm-17347912b46213658074416133396caffd034e0c.tar.gz
llvm-17347912b46213658074416133396caffd034e0c.tar.bz2
llvm-17347912b46213658074416133396caffd034e0c.tar.xz
X86: Emit vector sext as shuffle + sra if vpmovsx is not available.
Also loosen the SSSE3 dependency a bit, expanded pshufb + psra is still better than scalarized loads. Fixes PR14590. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170984 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp47
1 files changed, 39 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 444163d663..fd883075a2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16043,14 +16043,14 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::LoadExtType Ext = Ld->getExtensionType();
// If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. We need SSSE3 shuffles.
- // SEXT loads are suppoted starting SSE41.
- // We generate X86ISD::VSEXT for them.
+ // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
+ // expansion is still better than scalar code.
+ // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
+ // emit a shuffle and a arithmetic shift.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
- if (RegVT.isVector() && RegVT.isInteger() &&
- ((Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) ||
- (Ext == ISD::SEXTLOAD && Subtarget->hasSSE41()))){
+ if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
+ (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
@@ -16143,9 +16143,40 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
unsigned SizeRatio = RegSz/MemSz;
if (Ext == ISD::SEXTLOAD) {
- SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
- return DCI.CombineTo(N, Sext, TF, true);
+ // If we have SSE4.1 we can directly emit a VSEXT node.
+ if (Subtarget->hasSSE41()) {
+ SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ return DCI.CombineTo(N, Sext, TF, true);
+ }
+
+ // Otherwise we'll shuffle the small elements in the high bits of the
+ // larger type and perform an arithmetic shift. If the shift is not legal
+ // it's better to scalarize.
+ if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
+ return SDValue();
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+ // Build the arithmetic shift.
+ unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+ MemVT.getVectorElementType().getSizeInBits();
+ SmallVector<SDValue, 8> C(NumElems,
+ DAG.getConstant(Amt, RegVT.getScalarType()));
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size());
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV);
+
+ return DCI.CombineTo(N, Shuff, TF, true);
}
+
// Redistribute the loaded elements into the different locations.
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)