summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorKarthik Bhat <kv.bhat@samsung.com>2014-06-20 04:32:48 +0000
committerKarthik Bhat <kv.bhat@samsung.com>2014-06-20 04:32:48 +0000
commitd2ce9392dca6349a22c8f3e21e74aa59a82d8900 (patch)
treea7f380fb1cf0ef44fc663c52c8650abbdec0c0aa /lib
parentaf08b8b820acc71b02e7e8e4d0de3679c7971773 (diff)
downloadllvm-d2ce9392dca6349a22c8f3e21e74aa59a82d8900.tar.gz
llvm-d2ce9392dca6349a22c8f3e21e74aa59a82d8900.tar.bz2
llvm-d2ce9392dca6349a22c8f3e21e74aa59a82d8900.tar.xz
Add Support to Recognize and Vectorize NON SIMD instructions in SLPVectorizer.
This patch adds support to recognize patterns such as fadd,fsub,fadd,fsub.../add,sub,add,sub... and vectorizes them as vector shuffles if they are profitable. These patterns of vector shuffle can later be converted to instructions such as addsubpd etc on X86. Thanks to Arnold and Hal for the reviews. http://reviews.llvm.org/D4015 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211339 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp23
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp67
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp46
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp165
4 files changed, 259 insertions, 42 deletions
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 7f31b1a982..b2737bf754 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+ /// Estimate the cost overhead of SK_Alternate shuffle.
+ unsigned getAltShuffleOverhead(Type *Ty) const;
+
const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
public:
@@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return OpCost;
}
+unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
+ assert(Ty->isVectorTy() && "Can only shuffle vectors");
+ unsigned Cost = 0;
+ // Shuffle cost is equal to the cost of extracting element from its argument
+ // plus the cost of inserting them onto the result vector.
+
+ // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
+ // 0 of first vector, index 1 of second vector,index 2 of first vector and
+ // finally index 3 of second vector and insert them at index <0,1,2,3> of
+ // result vector.
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+ return Cost;
+}
+
unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
+ if (Kind == SK_Alternate) {
+ return getAltShuffleOverhead(Tp);
+ }
return 1;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 57df7da7f3..a2ace629ba 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
- // We only handle costs of reverse shuffles for now.
- if (Kind != SK_Reverse)
+ // We only handle costs of reverse and alternate shuffles for now.
+ if (Kind != SK_Reverse && Kind != SK_Alternate)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
- // Reverse shuffle cost one instruction if we are shuffling within a double
- // word (vrev) or two if we shuffle a quad word (vrev, vext).
- { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
-
- { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
- };
+ if (Kind == SK_Reverse) {
+ static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a
+ // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- return LT.first * NEONShuffleTbl[Idx].Cost;
+ return LT.first * NEONShuffleTbl[Idx].Cost;
+ }
+ if (Kind == SK_Alternate) {
+ static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+ // Alt shuffle cost table for ARM. Cost is the number of instructions
+ // required to create the shuffled vector.
+
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ int Idx =
+ CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return LT.first * NEONAltShuffleTbl[Idx].Cost;
+ }
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 299f9a581b..9fa911967e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -402,17 +402,47 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
- // We only estimate the cost of reverse shuffles.
- if (Kind != SK_Reverse)
+ // We only estimate the cost of reverse and alternate shuffles.
+ if (Kind != SK_Reverse && Kind != SK_Alternate)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
- unsigned Cost = 1;
- if (LT.second.getSizeInBits() > 128)
- Cost = 3; // Extract + insert + copy.
+ if (Kind == SK_Reverse) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ unsigned Cost = 1;
+ if (LT.second.getSizeInBits() > 128)
+ Cost = 3; // Extract + insert + copy.
- // Multiple by the number of parts.
- return Cost * LT.first;
+ // Multiple by the number of parts.
+ return Cost * LT.first;
+ }
+
+ if (Kind == SK_Alternate) {
+ static const CostTblEntry<MVT::SimpleValueType> X86AltShuffleTbl[] = {
+ // Alt shuffle cost table for X86. Cost is the number of instructions
+ // required to create the shuffled vector.
+
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 8},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 49}};
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup(X86AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return LT.first * X86AltShuffleTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f18202be16..53a43d9851 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -149,6 +149,48 @@ static bool isSplat(ArrayRef<Value *> VL) {
return true;
}
+///\returns Opcode that can be clubbed with \p Op to create an alternate
+/// sequence which can later be merged as a ShuffleVector instruction.
+static unsigned getAltOpcode(unsigned Op) {
+ switch (Op) {
+ case Instruction::FAdd:
+ return Instruction::FSub;
+ case Instruction::FSub:
+ return Instruction::FAdd;
+ case Instruction::Add:
+ return Instruction::Sub;
+ case Instruction::Sub:
+ return Instruction::Add;
+ default:
+ return 0;
+ }
+}
+
+///\returns bool representing if Opcode \p Op can be part
+/// of an alternate sequence which can later be merged as
+/// a ShuffleVector instruction.
+static bool canCombineAsAltInst(unsigned Op) {
+ if (Op == Instruction::FAdd || Op == Instruction::FSub ||
+ Op == Instruction::Sub || Op == Instruction::Add)
+ return true;
+ return false;
+}
+
+/// \returns ShuffleVector instruction if intructions in \p VL have
+/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
+/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
+static unsigned isAltInst(ArrayRef<Value *> VL) {
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ unsigned Opcode = I0->getOpcode();
+ unsigned AltOpcode = getAltOpcode(Opcode);
+ for (int i = 1, e = VL.size(); i < e; i++) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
+ return 0;
+ }
+ return Instruction::ShuffleVector;
+}
+
/// \returns The opcode if all of the Instructions in \p VL have the same
/// opcode, or zero.
static unsigned getSameOpcode(ArrayRef<Value *> VL) {
@@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
unsigned Opcode = I0->getOpcode();
for (int i = 1, e = VL.size(); i < e; i++) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (!I || Opcode != I->getOpcode())
+ if (!I || Opcode != I->getOpcode()) {
+ if (canCombineAsAltInst(Opcode) && i == 1)
+ return isAltInst(VL);
return 0;
+ }
}
return Opcode;
}
@@ -377,6 +422,7 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
+
private:
struct TreeEntry;
@@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
bool SameTy = getSameType(VL); (void)SameTy;
+ bool isAltShuffle = false;
assert(SameTy && "Invalid types!");
if (Depth == RecursionMaxDepth) {
@@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, false);
return;
}
+ unsigned Opcode = getSameOpcode(VL);
+
+ // Check that this shuffle vector refers to the alternate
+ // sequence of opcodes.
+ if (Opcode == Instruction::ShuffleVector) {
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ unsigned Op = I0->getOpcode();
+ if (Op != Instruction::ShuffleVector)
+ isAltShuffle = true;
+ }
// If all of the operands are identical or constant we have a simple solution.
- if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) ||
- !getSameOpcode(VL)) {
+ if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
newTreeEntry(VL, false);
return;
@@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
- unsigned Opcode = getSameOpcode(VL);
-
// Check if it is safe to sink the loads or the stores.
if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
Instruction *Last = getLastInstruction(VL);
@@ -1057,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
return;
}
+ case Instruction::ShuffleVector: {
+ // If this is not an alternate sequence of opcode like add-sub
+ // then do not vectorize this instruction.
+ if (!isAltShuffle) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
+ return;
+ }
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth + 1);
+ }
+ return;
+ }
default:
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
@@ -1080,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return getGatherCost(E->Scalars);
}
-
- assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) &&
- "Invalid VL");
+ unsigned Opcode = getSameOpcode(VL);
+ assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
Instruction *VL0 = cast<Instruction>(VL[0]);
- unsigned Opcode = VL0->getOpcode();
switch (Opcode) {
case Instruction::PHI: {
return 0;
@@ -1242,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
return VecCallCost - ScalarCallCost;
}
+ case Instruction::ShuffleVector: {
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue;
+ int ScalarCost = 0;
+ int VecCost = 0;
+ for (unsigned i = 0; i < VL.size(); ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ if (!I)
+ break;
+ ScalarCost +=
+ TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK);
+ }
+ // VecCost is equal to sum of the cost of creating 2 vectors
+ // and the cost of creating shuffle.
+ Instruction *I0 = cast<Instruction>(VL[0]);
+ VecCost =
+ TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK);
+ Instruction *I1 = cast<Instruction>(VL[1]);
+ VecCost +=
+ TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
+ VecCost +=
+ TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
+ return VecCost - ScalarCost;
+ }
default:
llvm_unreachable("Unknown instruction");
}
@@ -1522,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E->Scalars);
return Gather(E->Scalars, VecTy);
}
-
- unsigned Opcode = VL0->getOpcode();
- assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
+ unsigned Opcode = getSameOpcode(E->Scalars);
switch (Opcode) {
case Instruction::PHI: {
@@ -1797,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = V;
return V;
}
+ case Instruction::ShuffleVector: {
+ ValueList LHSVL, RHSVL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ }
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *LHS = vectorizeTree(LHSVL);
+ Value *RHS = vectorizeTree(RHSVL);
+
+ if (Value *V = alreadyVectorized(E->Scalars))
+ return V;
+
+ // Create a vector of LHS op1 RHS
+ BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0);
+ Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS);
+
+ // Create a vector of LHS op2 RHS
+ Instruction *VL1 = cast<Instruction>(E->Scalars[1]);
+ BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
+ Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
+
+ // Create appropriate shuffle to take alternative operations from
+ // the vector.
+ std::vector<Constant *> Mask(E->Scalars.size());
+ unsigned e = E->Scalars.size();
+ for (unsigned i = 0; i < e; ++i) {
+ if (i & 1)
+ Mask[i] = Builder.getInt32(e + i);
+ else
+ Mask[i] = Builder.getInt32(i);
+ }
+
+ Value *ShuffleMask = ConstantVector::get(Mask);
+
+ Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ E->VectorizedValue = V;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
+ return V;
+ }
default:
llvm_unreachable("unknown inst");
}
@@ -1865,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() {
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
-
// No need to handle users of gathered values.
if (Entry->NeedToGather)
continue;
@@ -2049,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass {
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
e = po_end(&F.getEntryBlock()); it != e; ++it) {
BasicBlock *BB = *it;
-
// Vectorize trees that end at stores.
if (unsigned count = collectStores(BB, R)) {
(void)count;