summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp2
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp131
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp162
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp44
5 files changed, 291 insertions, 54 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 0ef900e2b9..4a9cb27b03 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3323,8 +3323,6 @@ bool GlobalOpt::runOnModule(Module &M) {
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
- Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
-
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -3342,7 +3340,9 @@ bool GlobalOpt::runOnModule(Module &M) {
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M);
- // Try to remove trivial global destructors.
+ // Try to remove trivial global destructors if they are not removed
+ // already.
+ Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
if (CXAAtExitFn)
LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 87d56214a3..51ca29bc07 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -846,7 +846,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
/// FP value and:
/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
-/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// If the conversion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
///
static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 43e2e20035..4bf25facc6 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -107,6 +107,12 @@ namespace {
return std::make_pair(Vector.begin() + Pair.first->second, false);
}
+ iterator find(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
const_iterator find(const KeyT &Key) const {
typename MapTy::const_iterator It = Map.find(Key);
if (It == Map.end()) return Vector.end();
@@ -253,6 +259,40 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
return false;
}
+/// This is a wrapper around getUnderlyingObjCPtr along the lines of
+/// GetUnderlyingObjects except that it returns early when it sees the first
+/// alloca.
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ const Value *P = Worklist.pop_back_val();
+ P = GetUnderlyingObjCPtr(P);
+
+ if (isa<AllocaInst>(P))
+ return true;
+
+ if (!Visited.insert(P))
+ continue;
+
+ if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+ } while (!Worklist.empty());
+
+ return false;
+}
+
+
/// @}
///
/// \defgroup ARCOpt ARC Optimization.
@@ -300,18 +340,18 @@ STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
STATISTIC(NumRets, "Number of return value forwarding "
- "retain+autoreleaes eliminated");
+ "retain+autoreleases eliminated");
STATISTIC(NumRRs, "Number of retain+release paths eliminated");
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+#ifndef NDEBUG
STATISTIC(NumRetainsBeforeOpt,
- "Number of retains before optimization.");
+ "Number of retains before optimization");
STATISTIC(NumReleasesBeforeOpt,
- "Number of releases before optimization.");
-#ifndef NDEBUG
+ "Number of releases before optimization");
STATISTIC(NumRetainsAfterOpt,
- "Number of retains after optimization.");
+ "Number of retains after optimization");
STATISTIC(NumReleasesAfterOpt,
- "Number of releases after optimization.");
+ "Number of releases after optimization");
#endif
namespace {
@@ -414,8 +454,18 @@ namespace {
/// sequence.
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+ /// Does this pointer have multiple owners?
+ ///
+ /// In the presence of multiple owners with the same provenance caused by
+ /// allocas, we can not assume that the frontend will emit balanced code
+ /// since it could put the release on the pointer loaded from the
+ /// alloca. This confuses the optimizer so we must be more conservative in
+ /// that case.
+ bool MultipleOwners;
+
RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0),
+ MultipleOwners(false) {}
void clear();
@@ -428,6 +478,7 @@ namespace {
void RRInfo::clear() {
KnownSafe = false;
IsTailCallRelease = false;
+ MultipleOwners = false;
ReleaseMetadata = 0;
Calls.clear();
ReverseInsertPts.clear();
@@ -457,10 +508,12 @@ namespace {
Seq(S_None) {}
void SetKnownPositiveRefCount() {
+ DEBUG(dbgs() << "Setting Known Positive.\n");
KnownPositiveRefCount = true;
}
void ClearKnownPositiveRefCount() {
+ DEBUG(dbgs() << "Clearing Known Positive.\n");
KnownPositiveRefCount = false;
}
@@ -516,6 +569,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+ RRI.MultipleOwners |= Other.RRI.MultipleOwners;
// Merge the insert point sets. If there are any differences,
// that makes this a partial merge.
@@ -587,14 +641,26 @@ namespace {
/// definition.
void SetAsExit() { BottomUpPathCount = 1; }
+ /// Attempt to find the PtrState object describing the top down state for
+ /// pointer Arg. Return a new initialized PtrState describing the top down
+ /// state for Arg if we do not find one.
PtrState &getPtrTopDownState(const Value *Arg) {
return PerPtrTopDown[Arg];
}
+ /// Attempt to find the PtrState object describing the bottom up state for
+ /// pointer Arg. Return a new initialized PtrState describing the bottom up
+ /// state for Arg if we do not find one.
PtrState &getPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp[Arg];
}
+ /// Attempt to find the PtrState object describing the bottom up state for
+ /// pointer Arg.
+ ptr_iterator findPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp.find(Arg);
+ }
+
void clearBottomUpPointers() {
PerPtrBottomUp.clear();
}
@@ -1440,11 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_RetainBlock:
// If we strength reduce an objc_retainBlock to an objc_retain, continue
// onto the objc_retain peephole optimizations. Otherwise break.
- if (!OptimizeRetainBlockCall(F, Inst, Class))
- break;
- // FALLTHROUGH
- case IC_Retain:
- ++NumRetainsBeforeOpt;
+ OptimizeRetainBlockCall(F, Inst, Class);
break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
@@ -1453,9 +1515,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
- case IC_Release:
- ++NumReleasesBeforeOpt;
- break;
}
// objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1866,6 +1925,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case IC_None:
// These are irrelevant.
return NestingDetected;
+ case IC_User:
+ // If we have a store into an alloca of a pointer we are tracking, the
+ // pointer has multiple owners implying that we must be more conservative.
+ //
+ // This comes up in the context of a pointer being ``KnownSafe''. In the
+ // presense of a block being initialized, the frontend will emit the
+ // objc_retain on the original pointer and the release on the pointer loaded
+ // from the alloca. The optimizer will through the provenance analysis
+ // realize that the two are related, but since we only require KnownSafe in
+ // one direction, will match the inner retain on the original pointer with
+ // the guard release on the original pointer. This is fixed by ensuring that
+ // in the presense of allocas we only unconditionally remove pointers if
+ // both our retain and our release are KnownSafe.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
+ BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
+ StripPointerCastsAndObjCCalls(SI->getValueOperand()));
+ if (I != MyStates.bottom_up_ptr_end())
+ I->second.RRI.MultipleOwners = true;
+ }
+ }
+ break;
default:
break;
}
@@ -2412,8 +2493,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
bool KnownSafe,
bool &AnyPairsCompletelyEliminated) {
// If a pair happens in a region where it is known that the reference count
- // is already incremented, we can similarly ignore possible decrements.
+ // is already incremented, we can similarly ignore possible decrements unless
+ // we are dealing with a retainable object with multiple provenance sources.
bool KnownSafeTD = true, KnownSafeBU = true;
+ bool MultipleOwners = false;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2432,6 +2515,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
KnownSafeTD &= NewRetainRRI.KnownSafe;
+ MultipleOwners |= NewRetainRRI.MultipleOwners;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewRetainRRI.Calls.begin(),
LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2525,9 +2609,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented or nested, we can safely delete the
- // pair regardless of what's between them.
- if (KnownSafeTD || KnownSafeBU) {
+ // If the pointer is known incremented in 1 direction and we do not have
+ // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
+ // to be known safe in both directions.
+ bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
+ ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+ if (UnconditionallySafe) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
NewCount = 0;
@@ -3050,6 +3137,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
PA.setAA(&getAnalysis<AliasAnalysis>());
+#ifndef NDEBUG
+ if (AreStatisticsEnabled()) {
+ GatherStatistics(F, false);
+ }
+#endif
+
// This pass performs several distinct transformations. As a compile-time aid
// when compiling code that isn't ObjC, skip these if the relevant ObjC
// library functions aren't declared.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0dd6abb1ae..58a1a74655 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -318,6 +318,93 @@ private:
ValueMap WidenMap;
};
+/// \brief Check if conditionally executed loads are hoistable.
+///
+/// This class has two functions: isHoistableLoad and canHoistAllLoads.
+/// isHoistableLoad should be called on all load instructions that are executed
+/// conditionally. After all conditional loads are processed, the client should
+/// call canHoistAllLoads to determine if all of the conditional executed loads
+/// have an unconditional memory access to the same memory address in the loop.
+class LoadHoisting {
+ typedef SmallPtrSet<Value *, 8> MemorySet;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ MemorySet CondLoadAddrSet;
+
+public:
+ LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
+
+ /// \brief Check if the instruction is a load with a identifiable address.
+ bool isHoistableLoad(Instruction *L);
+
+ /// \brief Check if all of the conditional loads are hoistable because there
+ /// exists an unconditional memory access to the same address in the loop.
+ bool canHoistAllLoads();
+};
+
+bool LoadHoisting::isHoistableLoad(Instruction *L) {
+ LoadInst *LI = dyn_cast<LoadInst>(L);
+ if (!LI)
+ return false;
+
+ CondLoadAddrSet.insert(LI->getPointerOperand());
+ return true;
+}
+
+static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ Value *Addr = 0;
+
+ // Try a load.
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ if (LI) {
+ Addr = LI->getPointerOperand();
+ Set.insert(Addr);
+ continue;
+ }
+
+ // Try a store.
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ if (!SI)
+ continue;
+
+ Addr = SI->getPointerOperand();
+ Set.insert(Addr);
+ }
+}
+
+bool LoadHoisting::canHoistAllLoads() {
+ // No conditional loads.
+ if (CondLoadAddrSet.empty())
+ return true;
+
+ MemorySet UncondMemAccesses;
+ std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+
+ // Iterate over the unconditional blocks and collect memory access addresses.
+ for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+ BasicBlock *BB = LoopBlocks[i];
+
+ // Ignore conditional blocks.
+ if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
+ continue;
+
+ addMemAccesses(BB, UncondMemAccesses);
+ }
+
+ // And make sure there is a matching unconditional access for every
+ // conditional load.
+ for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
+ ME = CondLoadAddrSet.end(); MI != ME; ++MI)
+ if (!UncondMemAccesses.count(*MI))
+ return false;
+
+ return true;
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -337,7 +424,8 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -598,6 +686,9 @@ private:
RuntimePointerCheck PtrRtCheck;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
+
+ /// Utility to determine whether loads can be speculated.
+ LoadHoisting LoadSpeculation;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1389,9 +1480,10 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- // Handle the integer induction counter:
+ // Handle the integer induction counter.
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
- assert(OrigPhi == OldInduction && "Unknown integer PHI");
+
+ // We have the canonical induction variable.
if (OrigPhi == OldInduction) {
// Create a truncated version of the resume value for the scalar loop,
// we might have promoted the type to a larger width.
@@ -1402,11 +1494,20 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
TruncResumeVal->addIncoming(EndValue, VecBody);
+
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ break;
}
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
+
+ // Not the canonical induction variable - add the vector loop count to the
+ // start value.
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
break;
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
@@ -2056,12 +2157,25 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- assert(P == OldInduction && "Unexpected PHI");
- // We might have had to extend the type.
- Value *Trunc = Builder.CreateTrunc(Induction, P->getType());
- Value *Broadcasted = getBroadcastInstrs(Trunc);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding 0, 1, 2 ...
+ assert(P->getType() == II.StartValue->getType() && "Types must match");
+ Type *PhiTy = P->getType();
+ Value *Broadcasted;
+ if (P == OldInduction) {
+ // Handle the canonical induction variable. We might have had to
+ // extend the type.
+ Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+ } else {
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+ "normalized.idx");
+ NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+ Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+ "offset.idx");
+ }
+ Broadcasted = getBroadcastInstrs(Broadcasted);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
continue;
@@ -2466,11 +2580,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
- if (Induction) {
- DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
- return false;
- }
- Induction = Phi;
+ // Use the phi node with the widest type as induction. Use the last
+ // one if there are multiple (no good reason for doing this other
+ // than it is expedient).
+ if (!Induction || PhiTy == WidestIndTy)
+ Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
@@ -3236,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- // We don't predicate loads/stores at the moment.
- if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ // We might be able to hoist the load.
+ if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
+ return false;
+
+ // We don't predicate stores at the moment.
+ if (it->mayWriteToMemory() || it->mayThrow())
return false;
// The instructions below can trap.
@@ -3251,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
}
}
+ // Check that we can actually speculate the hoistable loads.
+ if (!LoadSpeculation.canHoistAllLoads())
+ return false;
+
return true;
}
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 55adf8a816..50d2af0f65 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -282,6 +282,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
DEBUG(dbgs()<<"SLP: Adding to MustExtract "
"because of a safe out of tree usage.\n");
MustExtract.insert(*it);
+ continue;
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
@@ -610,6 +611,9 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
GatherInstructions.push_back(Vec);
}
+ for (unsigned i = 0; i < Ty->getNumElements(); ++i)
+ VectorizedValues[VL[i]] = Vec;
+
return Vec;
}
@@ -617,6 +621,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
Instruction *LastInstr = GetLastInstr(VL, VL.size());
+ int LastInstrIdx = InstrIdx[LastInstr];
IRBuilder<> Builder(LastInstr);
for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
it != e; ++it) {
@@ -625,7 +630,16 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
assert(LaneMap.count(I) && "Unable to find the lane for the external use");
Value *Idx = Builder.getInt32(LaneMap[I]);
Value *Extract = Builder.CreateExtractElement(Vec, Idx);
- I->replaceAllUsesWith(Extract);
+ bool Replaced = false;
+ for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE;
+ ++U) {
+ Instruction *UI = cast<Instruction>(*U);
+ if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
+ UI->replaceUsesOfWith(I ,Extract);
+ Replaced = true;
+ }
+ assert(Replaced && "Must replace at least one outside user");
+ (void)Replaced;
}
// We moved some instructions around. We have to number them again
@@ -633,6 +647,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
numberInstructions();
MustScalarize.clear();
MustExtract.clear();
+ VectorizedValues.clear();
return V;
}
@@ -690,7 +705,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
IRBuilder<> Builder(GetLastInstr(VL, VF));
CastInst *CI = dyn_cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- VectorizedValues[VL0] = V;
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = V;
+
return V;
}
case Instruction::Add:
@@ -713,16 +731,19 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
for (int i = 0; i < VF; ++i) {
- RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
}
- Value *RHS = vectorizeTree_rec(RHSVL, VF);
Value *LHS = vectorizeTree_rec(LHSVL, VF);
+ Value *RHS = vectorizeTree_rec(RHSVL, VF);
IRBuilder<> Builder(GetLastInstr(VL, VF));
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
- VectorizedValues[VL0] = V;
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = V;
+
return V;
}
case Instruction::Load: {
@@ -739,7 +760,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
VecTy->getPointerTo());
LI = Builder.CreateLoad(VecPtr);
LI->setAlignment(Alignment);
- VectorizedValues[VL0] = LI;
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = LI;
+
return LI;
}
case Instruction::Store: {
@@ -762,9 +786,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
return 0;
}
default:
- Value *S = Scalarize(VL, VecTy);
- VectorizedValues[VL0] = S;
- return S;
+ return Scalarize(VL, VecTy);
}
}