diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 45 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_shuffle-41.ll | 21 |
2 files changed, 60 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 815d27ff5e..306fb7e199 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5703,6 +5703,41 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); } +/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real +/// underlying vector and index. +/// +/// Modifies \p ExtractedFromVec to the real vector and returns the real +/// index. +static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, + SDValue ExtIdx) { + int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); + if (!isa<ShuffleVectorSDNode>(ExtractedFromVec)) + return Idx; + + // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already + // lowered this: + // (extract_vector_elt (v8f32 %vreg1), Constant<6>) + // to: + // (extract_vector_elt (vector_shuffle<2,u,u,u> + // (extract_subvector (v8f32 %vreg0), Constant<4>), + // undef) + // Constant<0>) + // In this case the vector is the extract_subvector expression and the index + // is 2, as specified by the shuffle. + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec); + SDValue ShuffleVec = SVOp->getOperand(0); + MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); + assert(ShuffleVecVT.getVectorElementType() == + ExtractedFromVec.getSimpleValueType().getVectorElementType()); + + int ShuffleIdx = SVOp->getMaskElt(Idx); + if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { + ExtractedFromVec = ShuffleVec; + return ShuffleIdx; + } + return Idx; +} + static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); @@ -5736,15 +5771,15 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); SDValue ExtIdx = Op.getOperand(i).getOperand(1); + // Quit if non-constant index. + if (!isa<ConstantSDNode>(ExtIdx)) + return SDValue(); + int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); // Quit if extracted from vector of different type. if (ExtractedFromVec.getValueType() != VT) return SDValue(); - // Quit if non-constant index. - if (!isa<ConstantSDNode>(ExtIdx)) - return SDValue(); - if (VecIn1.getNode() == 0) VecIn1 = ExtractedFromVec; else if (VecIn1 != ExtractedFromVec) { @@ -5755,8 +5790,6 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { return SDValue(); } - unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); - if (ExtractedFromVec == VecIn1) Mask[i] = Idx; else if (ExtractedFromVec == VecIn2) diff --git a/test/CodeGen/X86/vec_shuffle-41.ll b/test/CodeGen/X86/vec_shuffle-41.ll new file mode 100644 index 0000000000..28fdd2f5ce --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-41.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s + +; Use buildFromShuffleMostly which allows this to be generated as two 128-bit +; shuffles and an insert. + +; This is the (somewhat questionable) LLVM IR that is generated for: +; x8.s0123456 = x8.s1234567; // x8 is a <8 x float> type +; x8.s7 = f; // f is float + + +define <8 x float> @test1(<8 x float> %a, float %b) { +; CHECK-LABEL: test1: +; CHECK: vinsertps +; CHECK-NOT: vinsertps +entry: + %shift = shufflevector <8 x float> %a, <8 x float> undef, <7 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %extend = shufflevector <7 x float> %shift, <7 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef> + %insert = insertelement <8 x float> %extend, float %b, i32 7 + + ret <8 x float> %insert +} |