summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-04-09 19:44:35 +0000
committerNadav Rotem <nrotem@apple.com>2013-04-09 19:44:35 +0000
commit8383b539ff4c039108ee0c202a27b787621d96cf (patch)
treea94c718adf657b35e9c1581987a588bac83242f1 /lib/Transforms
parent376e05fd7ba37b76ea26fa7604671c9abd32307e (diff)
downloadllvm-8383b539ff4c039108ee0c202a27b787621d96cf.tar.gz
llvm-8383b539ff4c039108ee0c202a27b787621d96cf.tar.bz2
llvm-8383b539ff4c039108ee0c202a27b787621d96cf.tar.xz
Add support for bottom-up SLP vectorization infrastructure.
This commit adds the infrastructure for performing bottom-up SLP vectorization (and other optimizations) on parallel computations. The infrastructure has three potential users: 1. The loop vectorizer needs to be able to vectorize AOS data structures such as (sum += A[i] + A[i+1]). 2. The BB-vectorizer needs this infrastructure for bottom-up SLP vectorization, because bottom-up vectorization is faster to compute. 3. A loop-roller needs to be able to analyze consecutive chains and roll them into a loop, in order to reduce code size. A loop roller does not need to create vector instructions, and this infrastructure separates the chain analysis from the vectorization. This patch also includes a simple (100 LOC) bottom up SLP vectorizer that uses the infrastructure, and can vectorize this code: void SAXPY(int *x, int *y, int a, int i) { x[i] = a * x[i] + y[i]; x[i+1] = a * x[i+1] + y[i+1]; x[i+2] = a * x[i+2] + y[i+2]; x[i+3] = a * x[i+3] + y[i+3]; } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179117 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt2
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp153
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp439
-rw-r--r--lib/Transforms/Vectorize/VecUtils.h108
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp5
5 files changed, 707 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index e64034ab26..7ae082f55e 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -2,6 +2,8 @@ add_llvm_library(LLVMVectorize
BBVectorize.cpp
Vectorize.cpp
LoopVectorize.cpp
+ SLPVectorizer.cpp
+ VecUtils.cpp
)
add_dependencies(LLVMVectorize intrinsics_gen)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000000..4b61dc9120
--- /dev/null
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,153 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Only vectorize trees if the gain is above this "
+ "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public BasicBlockPass {
+ typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ explicit SLPVectorizer() : BasicBlockPass(ID) {
+ initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+
+ /// \brief Collect memory references and sort them according to their base
+ /// object. We sort the stores to their base objects to reduce the cost of the
+ /// quadratic search on the stores. TODO: We can further reduce this cost
+ /// if we flush the chain creation every time we run into a memory barrier.
+ bool CollectStores(BasicBlock *BB, BoUpSLP &R) {
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Can't vectorize instructions with side effects.
+ if (it->mayThrow())
+ return false;
+
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ if (!SI)
+ continue;
+
+ // Check that the pointer points to scalars.
+ if (SI->getValueOperand()->getType()->isAggregateType())
+ return false;
+
+ // Find the base of the GEP.
+ Value *Ptr = SI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEP->getPointerOperand();
+
+ // Save the store locations.
+ StoreRefs[Ptr].push_back(SI);
+ }
+ return true;
+ }
+
+ bool RollStoreChains(BoUpSLP &R) {
+ bool Changed = false;
+ // Attempt to sort and vectorize each of the store-groups.
+ for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+ it != e; ++it) {
+ if (it->second.size() < 2)
+ continue;
+ Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ }
+ return Changed;
+ }
+
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ StoreRefs.clear();
+
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(&BB, SE, DL, TTI, AA);
+
+ if (!CollectStores(&BB, R))
+ return false;
+
+ bool Changed = RollStoreChains(R);
+ if (Changed) {
+ DEBUG(dbgs()<<"Rolled chains in \""<<BB.getParent()->getName()<<"\"\n");
+ DEBUG(verifyFunction(*BB.getParent()));
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ BasicBlockPass::getAnalysisUsage(AU);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+private:
+ StoreListMap StoreRefs;
+};
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createSLPVectorizerPass() {
+ return new SLPVectorizer();
+ }
+}
+
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 0000000000..7e9f12d43c
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,439 @@
+//===- VecUtils.h --- Vectorization Utilities -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "VecUtils"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa) :
+ BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) {
+ numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+ int Loc = 0;
+ InstrIdx.clear();
+ InstrVec.clear();
+ // Number the instructions in the block.
+ for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+ InstrIdx[it] = Loc++;
+ InstrVec.push_back(it);
+ assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+ }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+ return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+ if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+ if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+ return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ // Check that the address spaces match and that the pointers are valid.
+ if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+ // Check that A and B are of the same type.
+ if (PtrA->getType() != PtrB->getType()) return false;
+
+ // Calculate the distance.
+ const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+ const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+ const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+ // Non constant distance.
+ if (!ConstOffSCEV) return false;
+
+ unsigned Offset = ConstOffSCEV->getValue()->getSExtValue();
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+ // The Instructions are connsecutive if the size of the first load/store is
+ // the same as the offset.
+ unsigned Sz = (DL ? DL->getTypeStoreSize(Ty) : Ty->getScalarSizeInBits()/8);
+ return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
+ ValueSet Heads, Tails;
+ SmallDenseMap<Value*, Value*> ConsecutiveChain;
+ bool Changed = false;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of loads that follow each other.
+ for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+ for (unsigned j = 0; j < e; ++j) {
+ if (i == j) continue;
+ if (isConsecutiveAccess(Stores[i], Stores[j])) {
+ Tails.insert(Stores[j]);
+ Heads.insert(Stores[i]);
+ ConsecutiveChain[Stores[i]] = Stores[j];
+ }
+ }
+
+ // For stores that start but don't end a link in the chain:
+ for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+ if (Tails.count(*it)) continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to vectorize it.
+ ValueList Operands;
+ Value *I = *it;
+ int MinCost = 0, MinVF = 0;
+ while (Tails.count(I) || Heads.count(I)) {
+ Operands.push_back(I);
+ unsigned VF = Operands.size();
+ if (isPowerOf2_32(VF) && VF > 1) {
+ int cost = getTreeRollCost(Operands, 0);
+ DEBUG(dbgs() << "Found cost=" << cost << " for VF=" << VF << "\n");
+ if (cost < MinCost) { MinCost = cost; MinVF = VF; }
+ }
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
+
+ if (MinCost <= costThreshold && MinVF > 1) {
+ DEBUG(dbgs() << "Decided to vectorize cost=" << MinCost << "\n");
+ vectorizeTree(Operands, MinVF);
+ Stores.clear();
+ // The current numbering is invalid because we added and removed instrs.
+ numberInstructions();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+ int Cost = 0;
+ for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+ Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+ return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+ assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+ BasicBlock::iterator I = Src, E = Dst;
+ /// Scan all of the instruction from SRC to DST and check if
+ /// the source may alias.
+ for (++I; I != E; ++I) {
+ // Ignore store instructions that are marked as 'ignore'.
+ if (MemBarrierIgnoreList.count(I)) continue;
+ if (Src->mayWriteToMemory()) /* Write */ {
+ if (!I->mayReadOrWriteMemory()) continue;
+ } else /* Read */ {
+ if (!I->mayWriteToMemory()) continue;
+ }
+ AliasAnalysis::Location A = getLocation(&*I);
+ AliasAnalysis::Location B = getLocation(Src);
+
+ if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+ return I;
+ }
+ return 0;
+}
+
+int BoUpSLP::getTreeRollCost(ValueList &VL, unsigned Depth) {
+ if (Depth == 6) return max_cost;
+ Type *ScalarTy = VL[0]->getType();
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+
+ /// Don't mess with vectors.
+ if (ScalarTy->isVectorTy()) return max_cost;
+
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // Must have a single use.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // Need to scalarize instructions with multiple users or from other BBs.
+ if (I && ((I->getNumUses() > 1) || (I->getParent() != BB)))
+ return getScalarizationCost(VecTy);
+ }
+
+ // Is this a simple vector constant.
+ if (AllConst) return 0;
+
+ // If all of the operands are identical we can broadcast them.
+ if (AllSameScalar)
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return getScalarizationCost(VecTy);
+ assert(VL0->getParent() == BB && "Wrong BB");
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+ }
+
+ // Check if it is safe to sink the loads or the stores.
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+ int MaxIdx = InstrIdx[VL0];
+ for (unsigned i = 1, e = VL.size(); i < e; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+ Instruction *Last = InstrVec[MaxIdx];
+ for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+ if (VL[i] == Last) continue;
+ Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+ if (Barrier) {
+ DEBUG(dbgs() << "LR: Can't sink " << *VL[i] << "\n down to " <<
+ *Last << "\n because of " << *Barrier << "\n");
+ return max_cost;
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ ValueList Operands;
+ int Cost = 0;
+ // Calculate the cost of all of the operands.
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+ Cost += getTreeRollCost(Operands, Depth+1);
+ Operands.clear();
+ }
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+ int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Load: {
+ // If we are scalarize the loads, add the cost of forming the vector.
+ for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i], VL[i+1]))
+ return getScalarizationCost(VecTy);
+
+ // Cost of wide load - cost of scalar loads.
+ int ScalarLdCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ return VecLdCost - ScalarLdCost;
+ }
+ case Instruction::Store: {
+ // We know that we can merge the stores. Calculate the cost.
+ int ScalarStCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+ int StoreCost = VecStCost - ScalarStCost;
+
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ MemBarrierIgnoreList.insert(VL[j]);
+ }
+
+ int TotalCost = StoreCost + getTreeRollCost(Operands, Depth + 1);
+ MemBarrierIgnoreList.clear();
+ return TotalCost;
+ }
+ default:
+ // Unable to vectorize unknown instructions.
+ return getScalarizationCost(VecTy);
+ }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
+ int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+ for (unsigned i = 0; i < VF; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+ return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
+ IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+ Value *Vec = UndefValue::get(Ty);
+ for (unsigned i=0; i < Ty->getNumElements(); ++i)
+ Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+ return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+ // Check if all of the operands are constants or identical.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ AllConst &= !!dyn_cast<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // Must have a single use.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (I && (I->getNumUses() > 1 || I->getParent() != BB))
+ return Scalarize(VL, VecTy);
+ }
+
+ // Is this a simple vector constant.
+ if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return Scalarize(VL, VecTy);
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+ }
+
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ ValueList LHSVL, RHSVL;
+ for (int i = 0; i < VF; ++i) {
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ Value *RHS = vectorizeTree(RHSVL, VF);
+ Value *LHS = vectorizeTree(LHSVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
+ return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+ }
+ case Instruction::Load: {
+ LoadInst *LI = dyn_cast<LoadInst>(VL0);
+ unsigned Alignment = LI->getAlignment();
+
+ // Check if all of the loads are consecutive.
+ for (unsigned i = 1, e = VF; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i-1], VL[i]))
+ return Scalarize(VL, VecTy);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo());
+ LI = Builder.CreateLoad(VecPtr);
+ LI->setAlignment(Alignment);
+ return LI;
+ }
+ case Instruction::Store: {
+ StoreInst *SI = dyn_cast<StoreInst>(VL0);
+ unsigned Alignment = SI->getAlignment();
+
+ ValueList ValueOp;
+ for (int i = 0; i < VF; ++i)
+ ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+ Value *VecValue = vectorizeTree(ValueOp, VF);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo());
+ Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+ for (int i = 0; i < VF; ++i)
+ cast<Instruction>(VL[i])->eraseFromParent();
+ return 0;
+ }
+ default:
+ return Scalarize(VL, VecTy);
+ }
+}
+
+} // end of namespace
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 0000000000..b1c2b0c649
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,108 @@
+//===- VecUtils.cpp - Vectorization Utilities -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_AOSVECTORIZER_H
+#define LLVM_TRANSFORMS_VECTORIZE_AOSVECTORIZER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP {
+ typedef SmallVector<Value*, 8> ValueList;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ typedef SmallVector<StoreInst*, 8> StoreList;
+ static const int max_cost = 1<<20;
+
+ // \brief C'tor.
+ BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa);
+
+ /// \returns true if the memory operations A and B are consecutive.
+ bool isConsecutiveAccess(Value *A, Value *B);
+
+ /// \brief Vectorize the tree that starts with the elements in \p VL.
+ /// \returns the vectorized value.
+ Value *vectorizeTree(ValueList &VL, int VF);
+
+ /// \returns the vectorization cost of the subtree that starts at \p VL.
+ /// A negative number means that this is profitable.
+ int getTreeRollCost(ValueList &VL, unsigned Depth);
+
+ /// \brief Take the pointer operand from the Load/Store instruction.
+ /// \returns NULL if this is not a valid Load/Store instruction.
+ static Value *getPointerOperand(Value *I);
+
+ /// \brief Take the address space operand from the Load/Store instruction.
+ /// \returns -1 if this is not a valid Load/Store instruction.
+ static unsigned getAddressSpaceOperand(Value *I);
+
+ /// \brief Attempts to order and vectorize a sequence of stores. This
+ /// function does a quadratic scan of the given stores.
+ /// \returns true if the basic block was modified.
+ bool vectorizeStores(StoreList &Stores, int costThreshold);
+
+ /// \brief Number all of the instructions in the block.
+ void numberInstructions();
+
+private:
+ /// \returns the scalarization cost for this type. Scalarization in this
+ /// context means the creation of vectors from a group of scalars.
+ int getScalarizationCost(Type *Ty);
+
+ /// \returns the AA location that is being access by the instruction.
+ AliasAnalysis::Location getLocation(Instruction *I);
+
+ /// \brief Checks if it is possible to sink an instruction from
+ /// \p Src to \p Dst.
+ /// \returns the pointer to the barrier instruction if we can't sink.
+ Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+ /// \returns the instruction that appears last in the BB from \p VL.
+ /// Only consider the first \p VF elements.
+ Instruction *GetLastInstr(ValueList &VL, unsigned VF);
+
+ /// \returns a vector from a collection of scalars in \p VL.
+ Value *Scalarize(ValueList &VL, VectorType *Ty);
+
+ // Maps instructions to numbers and back.
+ SmallDenseMap<Value*, int> InstrIdx;
+ std::vector<Instruction*> InstrVec;
+ // A list of instructions to ignore while sinking
+ // memory instructions.
+ SmallSet<Value*, 8> MemBarrierIgnoreList;
+ // Analysis and block reference.
+ BasicBlock *BB;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+};
+
+} // end of namespace
+# endif //LLVM_TRANSFORMS_VECTORIZE_AOSVECTORIZER_H
+
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2f87..3aff6366a6 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
+ initializeSLPVectorizerPass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopVectorizePass());
}
+
+void LLVMAddLoopRollerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSLPVectorizerPass());
+}