diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2012-12-22 11:34:28 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2012-12-22 11:34:28 +0000 |
commit | 17347912b46213658074416133396caffd034e0c (patch) | |
tree | 7ffd4581245e74f4d8922c2a459d7231c3a3808d /lib/Target/X86/X86ISelLowering.cpp | |
parent | b44c1f90e465a1905cff00212929520ab1f36b64 (diff) | |
download | llvm-17347912b46213658074416133396caffd034e0c.tar.gz llvm-17347912b46213658074416133396caffd034e0c.tar.bz2 llvm-17347912b46213658074416133396caffd034e0c.tar.xz |
X86: Emit vector sext as shuffle + sra if vpmovsx is not available.
Also loosen the SSSE3 dependency a bit, expanded pshufb + psra is still better
than scalarized loads. Fixes PR14590.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170984 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 47 |
1 files changed, 39 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 444163d663..fd883075a2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16043,14 +16043,14 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); // If this is a vector EXT Load then attempt to optimize it using a - // shuffle. We need SSSE3 shuffles. - // SEXT loads are suppoted starting SSE41. - // We generate X86ISD::VSEXT for them. + // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the + // expansion is still better than scalar code. + // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll + // emit a shuffle and a arithmetic shift. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. - if (RegVT.isVector() && RegVT.isInteger() && - ((Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) || - (Ext == ISD::SEXTLOAD && Subtarget->hasSSE41()))){ + if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() && + (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) { assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -16143,9 +16143,40 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, unsigned SizeRatio = RegSz/MemSz; if (Ext == ISD::SEXTLOAD) { - SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); - return DCI.CombineTo(N, Sext, TF, true); + // If we have SSE4.1 we can directly emit a VSEXT node. + if (Subtarget->hasSSE41()) { + SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); + return DCI.CombineTo(N, Sext, TF, true); + } + + // Otherwise we'll shuffle the small elements in the high bits of the + // larger type and perform an arithmetic shift. If the shift is not legal + // it's better to scalarize. + if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT)) + return SDValue(); + + // Redistribute the loaded elements into the different locations. + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i*SizeRatio + SizeRatio-1] = i; + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); + + Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + + // Build the arithmetic shift. + unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - + MemVT.getVectorElementType().getSizeInBits(); + SmallVector<SDValue, 8> C(NumElems, + DAG.getConstant(Amt, RegVT.getScalarType())); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size()); + Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV); + + return DCI.CombineTo(N, Shuff, TF, true); } + // Redistribute the loaded elements into the different locations. SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) |