diff options
author | Serge Pavlov <sepavloff@gmail.com> | 2014-05-11 08:46:12 +0000 |
---|---|---|
committer | Serge Pavlov <sepavloff@gmail.com> | 2014-05-11 08:46:12 +0000 |
commit | 86118b4532f0790fe7168fcf00e61a09fa2e5362 (patch) | |
tree | 246c01308e3e871a15cf1821ef239a5256faad67 | |
parent | 4ccf0ebb19ffcd22ee730d3c90112dea31d619f4 (diff) | |
download | llvm-86118b4532f0790fe7168fcf00e61a09fa2e5362.tar.gz llvm-86118b4532f0790fe7168fcf00e61a09fa2e5362.tar.bz2 llvm-86118b4532f0790fe7168fcf00e61a09fa2e5362.tar.xz |
Reorder shuffle and binary operation.
This patch enables transformations:
BinOp(shuffle(v1), shuffle(v2)) -> shuffle(BinOp(v1, v2))
BinOp(shuffle(v1), const1) -> shuffle(BinOp, const2)
They allow to eliminate extra shuffles in some cases.
Differential Revision: http://reviews.llvm.org/D3525
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208488 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 12 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 24 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 35 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 94 | ||||
-rw-r--r-- | test/Transforms/InstCombine/vec_shuffle.ll | 120 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/store-shuffle-bug.ll | 17 |
9 files changed, 300 insertions, 22 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 4a5517a6d3..0fbc7a4619 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -358,6 +358,8 @@ private: Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth = 0); + Value *SimplifyVectorOp(BinaryOperator &Inst); + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction // into the PHI (which is only possible if all operands to the PHI are diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index e461145d6b..326bc7539d 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -920,6 +920,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -1195,6 +1198,9 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1370,6 +1376,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -1540,6 +1549,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 8c5da90ffe..4f5d65ab78 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1106,6 +1106,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAndInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1907,6 +1910,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyOrInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -2239,6 +2245,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyXorInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ab705a4056..78689933f4 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -120,6 +120,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyMulInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -428,6 +431,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (isa<Constant>(Op0)) std::swap(Op0, Op1); @@ -878,6 +884,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyUDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -937,6 +946,9 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1023,6 +1035,9 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1185,6 +1200,9 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyURemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1217,6 +1235,9 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1288,6 +1309,9 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Instruction *InstCombiner::visitFRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 5504cb2ef8..cc6665c947 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -686,6 +686,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, } Instruction *InstCombiner::visitShl(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) @@ -724,6 +727,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -764,6 +770,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b072b4d422..bd647d9e1c 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -839,6 +839,20 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { llvm_unreachable("failed to reorder elements of vector instruction!"); } +static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask, + bool &isLHSID, bool &isRHSID) { + isLHSID = isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -902,16 +916,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } + bool isLHSID, isRHSID; + RecognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); @@ -1106,5 +1112,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } + // If the result mask is an identity, replace uses of this instruction with + // corresponding argument. + if (VWidth == LHSWidth) { + bool isLHSID, isRHSID; + RecognizeIdentityMask(newMask, isLHSID, isRHSID); + if (isLHSID) return ReplaceInstUsesWith(SVI, newLHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, newRHS); + } + return MadeChange ? &SVI : nullptr; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index f0964e8d39..82b7cae410 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1083,6 +1083,100 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } while (1); } +/// \brief Creates node of binary operation with the same attributes as the +/// specified one but with other operands. +static BinaryOperator *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, + Value *RHS, + InstCombiner::BuilderTy *B) { + BinaryOperator *NewBO = cast<BinaryOperator>(B->CreateBinOp(Inst.getOpcode(), + LHS, RHS)); + if (isa<OverflowingBinaryOperator>(NewBO)) { + NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap()); + NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap()); + } + if (isa<PossiblyExactOperator>(NewBO)) + NewBO->setIsExact(Inst.isExact()); + return NewBO; +} + +/// \brief Makes transformation of binary operation specific for vector types. +/// \param Inst Binary operator to transform. +/// \return Pointer to node that must replace the original binary operator, or +/// null pointer if no transformation was made. +Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { + if (!Inst.getType()->isVectorTy()) return nullptr; + + unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); + Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); + assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth); + assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth); + + // If both arguments of binary operation are shuffles, which use the same + // mask and shuffle within a single vector, it is worthwhile to move the + // shuffle after binary operation: + // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m) + if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) { + ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS); + ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS); + if (isa<UndefValue>(LShuf->getOperand(1)) && + isa<UndefValue>(RShuf->getOperand(1)) && + LShuf->getMask() == RShuf->getMask()) { + BinaryOperator *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), + RShuf->getOperand(0), Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(Inst.getType()), LShuf->getMask()); + return Res; + } + } + + // If one argument is a shuffle within one vector, the other is a constant, + // try moving the shuffle after the binary operation. + ShuffleVectorInst *Shuffle = nullptr; + Constant *C1 = nullptr; + if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS); + if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS); + if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS); + if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS); + if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) && + Shuffle->getType() == Shuffle->getOperand(0)->getType()) { + SmallVector<int, 16> ShMask = Shuffle->getShuffleMask(); + // Find constant C2 that has property: + // shuffle(C2, ShMask) = C1 + // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>) + // reorder is not possible. + SmallVector<Constant*, 16> C2M(VWidth, + UndefValue::get(C1->getType()->getScalarType())); + bool MayChange = true; + for (unsigned I = 0; I < VWidth; ++I) { + if (ShMask[I] >= 0) { + assert(ShMask[I] < (int)VWidth); + if (!isa<UndefValue>(C2M[ShMask[I]])) { + MayChange = false; + break; + } + C2M[ShMask[I]] = C1->getAggregateElement(I); + } + } + if (MayChange) { + Constant *C2 = ConstantVector::get(C2M); + Value *NewLHS, *NewRHS; + if (isa<Constant>(LHS)) { + NewLHS = C2; + NewRHS = Shuffle->getOperand(0); + } else { + NewLHS = Shuffle->getOperand(0); + NewRHS = C2; + } + BinaryOperator *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(Inst.getType()), Shuffle->getMask()); + return Res; + } + } + + return nullptr; +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index a409a911ef..d619ed0e13 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -244,4 +244,122 @@ define <4 x i8> @test16b(i8 %ele) { %tmp1 = shl <8 x i8> %tmp0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4> ret <4 x i8> %tmp2 -}
\ No newline at end of file +} + +; If composition of two shuffles is identity, shuffles can be removed. +define <4 x i32> @shuffle_17ident(<4 x i32> %v) nounwind uwtable { +; CHECK-LABEL: @shuffle_17ident( +; CHECK-NOT: shufflevector + %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %shuffle2 = shufflevector <4 x i32> %shuffle, <4 x i32> zeroinitializer, + <4 x i32> <i32 3, i32 0, i32 1, i32 2> + ret <4 x i32> %shuffle2 +} + +; swizzle can be put after operation +define <4 x i32> @shuffle_17and(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable { +; CHECK-LABEL: @shuffle_17and( +; CHECK-NOT: shufflevector +; CHECK: and <4 x i32> %v1, %v2 +; CHECK: shufflevector + %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %r = and <4 x i32> %t1, %t2 + ret <4 x i32> %r +} + +define <4 x i32> @shuffle_17add(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable { +; CHECK-LABEL: @shuffle_17add( +; CHECK-NOT: shufflevector +; CHECK: add <4 x i32> %v1, %v2 +; CHECK: shufflevector + %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %r = add <4 x i32> %t1, %t2 + ret <4 x i32> %r +} + +define <4 x i32> @shuffle_17addnsw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable { +; CHECK-LABEL: @shuffle_17addnsw( +; CHECK-NOT: shufflevector +; CHECK: add nsw <4 x i32> %v1, %v2 +; CHECK: shufflevector + %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %r = add nsw <4 x i32> %t1, %t2 + ret <4 x i32> %r +} + +define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable { +; CHECK-LABEL: @shuffle_17addnuw( +; CHECK-NOT: shufflevector +; CHECK: add nuw <4 x i32> %v1, %v2 +; CHECK: shufflevector + %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %r = add nuw <4 x i32> %t1, %t2 + ret <4 x i32> %r +} + +define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable { +; CHECK-LABEL: @shuffle_17fsub( +; CHECK-NOT: shufflevector +; CHECK: fsub <4 x float> %v1, %v2 +; CHECK: shufflevector + %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %r = fsub <4 x float> %t1, %t2 + ret <4 x float> %r +} + +define <4 x i32> @shuffle_17addconst(<4 x i32> %v1, <4 x i32> %v2) { +; CHECK-LABEL: @shuffle_17addconst( +; CHECK-NOT: shufflevector +; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = add <4 x i32> %v1, <i32 4, i32 1, i32 2, i32 3> +; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = shufflevector <4 x i32> [[VAR1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> +; CHECK: ret <4 x i32> [[VAR2]] + %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 2, i32 3, i32 0> + %r = add <4 x i32> %t1, <i32 1, i32 2, i32 3, i32 4> + ret <4 x i32> %r +} + +define <4 x i32> @shuffle_17add2(<4 x i32> %v) { +; CHECK-LABEL: @shuffle_17add2( +; CHECK-NOT: shufflevector +; CHECK: [[VAR:%[a-zA-Z0-9.]+]] = shl <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1> +; CHECK: ret <4 x i32> [[VAR]] + %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %t2 = add <4 x i32> %t1, %t1 + %r = shufflevector <4 x i32> %t2, <4 x i32> zeroinitializer, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %r +} + +define <4 x i32> @shuffle_17mulsplat(<4 x i32> %v) { +; CHECK-LABEL: @shuffle_17mulsplat( +; CHECK-NOT: shufflevector +; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = mul <4 x i32> %v, %v +; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = shufflevector <4 x i32> [[VAR1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: ret <4 x i32> [[VAR2]] + %s1 = shufflevector <4 x i32> %v, + <4 x i32> zeroinitializer, + <4 x i32> zeroinitializer + %m1 = mul <4 x i32> %s1, %s1 + %s2 = shufflevector <4 x i32> %m1, + <4 x i32> zeroinitializer, + <4 x i32> <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %s2 +} diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll index 0ec8010756..e53c1206e4 100644 --- a/test/Transforms/LoopVectorize/store-shuffle-bug.ll +++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll @@ -19,18 +19,13 @@ entry: ; CHECK-LABEL: @t( ; CHECK: vector.body: -; CHECK: load <4 x i32> -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = shufflevector -; CHECK: load <4 x i32> -; CHECK: [[VAR2:%[a-zA-Z0-9]+]] = shufflevector +; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = load <4 x i32> +; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = load <4 x i32> ; CHECK: [[VAR3:%[a-zA-Z0-9]+]] = add nsw <4 x i32> [[VAR2]], [[VAR1]] -; CHECK: [[VAR4:%[a-zA-Z0-9]+]] = shufflevector <4 x i32> [[VAR3]] -; CHECK: store <4 x i32> [[VAR4]] -; CHECK: load <4 x i32> -; CHECK: [[VAR5:%[a-zA-Z0-9]+]] = shufflevector -; CHECK-NOT: add nsw <4 x i32> [[VAR4]], [[VAR5]] -; CHECK-NOT: add nsw <4 x i32> [[VAR5]], [[VAR4]] -; CHECK: add nsw <4 x i32> [[VAR3]], [[VAR5]] +; CHECK: store <4 x i32> [[VAR3]] +; CHECK: [[VAR4:%[a-zA-Z0-9.]+]] = load <4 x i32> +; CHECK: add nsw <4 x i32> [[VAR3]], [[VAR4]] +; CHECK-NOT: shufflevector for.body: %indvars.iv = phi i64 [ 93, %entry ], [ %indvars.iv.next, %for.body ] |