From 57e02b3358e8caeddb093fab88fee6f73cd5d597 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 12 May 2013 22:55:57 +0000 Subject: SLPVectorizer: Clear the map that maps between scalars to vectors after each round of vectorization. Testcase in the next commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/VecUtils.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 55adf8a816..6f36c938fa 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -633,6 +633,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, int VF) { numberInstructions(); MustScalarize.clear(); MustExtract.clear(); + VectorizedValues.clear(); return V; } -- cgit v1.2.3 From 507b9242ed3cbac13a1c4c58fe28188e1a0d6fa6 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 12 May 2013 22:58:45 +0000 Subject: SLPVectorizer: Fix a bug in the code that generates extracts for values with multiple users. The external user does not have to be in lane #0. We have to save the lane for each scalar so that we know which vector lane to extract. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/VecUtils.cpp | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 6f36c938fa..1362b784f0 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -282,6 +282,7 @@ int BoUpSLP::getTreeCost(ArrayRef VL) { DEBUG(dbgs()<<"SLP: Adding to MustExtract " "because of a safe out of tree usage.\n"); MustExtract.insert(*it); + continue; } if (Lane == -1) Lane = LaneMap[*I]; if (Lane != LaneMap[*I]) { @@ -610,6 +611,9 @@ Value *BoUpSLP::Scalarize(ArrayRef VL, VectorType *Ty) { GatherInstructions.push_back(Vec); } + for (unsigned i = 0; i < Ty->getNumElements(); ++i) + VectorizedValues[VL[i]] = Vec; + return Vec; } @@ -617,6 +621,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, int VF) { Value *V = vectorizeTree_rec(VL, VF); Instruction *LastInstr = GetLastInstr(VL, VL.size()); + int LastInstrIdx = InstrIdx[LastInstr]; IRBuilder<> Builder(LastInstr); for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end(); it != e; ++it) { @@ -625,7 +630,15 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, int VF) { assert(LaneMap.count(I) && "Unable to find the lane for the external use"); Value *Idx = Builder.getInt32(LaneMap[I]); Value *Extract = Builder.CreateExtractElement(Vec, Idx); - I->replaceAllUsesWith(Extract); + bool Replaced = false; + for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE; + ++U) { + Instruction *UI = cast(*U); + if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx) + UI->replaceUsesOfWith(I ,Extract); + Replaced = true; + } + assert(Replaced && "Must replace at least one outside user"); } // We moved some instructions around. We have to number them again @@ -691,7 +704,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { IRBuilder<> Builder(GetLastInstr(VL, VF)); CastInst *CI = dyn_cast(VL0); Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); - VectorizedValues[VL0] = V; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = V; + return V; } case Instruction::Add: @@ -723,7 +739,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = cast(VL0); Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); - VectorizedValues[VL0] = V; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = V; + return V; } case Instruction::Load: { @@ -740,7 +759,10 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { VecTy->getPointerTo()); LI = Builder.CreateLoad(VecPtr); LI->setAlignment(Alignment); - VectorizedValues[VL0] = LI; + + for (int i = 0; i < VF; ++i) + VectorizedValues[VL[i]] = LI; + return LI; } case Instruction::Store: { @@ -763,9 +785,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { return 0; } default: - Value *S = Scalarize(VL, VecTy); - VectorizedValues[VL0] = S; - return S; + return Scalarize(VL, VecTy); } } -- cgit v1.2.3 From 985eb9004c280f4921e4bb450d6b3ee2ce75f634 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 13 May 2013 05:13:13 +0000 Subject: SLPVectorizer: Swap LHS and RHS. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/VecUtils.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 1362b784f0..80d61616aa 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -730,15 +730,15 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef VL, int VF) { case Instruction::Xor: { ValueList LHSVL, RHSVL; for (int i = 0; i < VF; ++i) { - RHSVL.push_back(cast(VL[i])->getOperand(0)); - LHSVL.push_back(cast(VL[i])->getOperand(1)); + LHSVL.push_back(cast(VL[i])->getOperand(0)); + RHSVL.push_back(cast(VL[i])->getOperand(1)); } - Value *RHS = vectorizeTree_rec(RHSVL, VF); Value *LHS = vectorizeTree_rec(LHSVL, VF); + Value *RHS = vectorizeTree_rec(RHSVL, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = cast(VL0); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS); for (int i = 0; i < VF; ++i) VectorizedValues[VL[i]] = V; -- cgit v1.2.3 From b99052ce4a75a3eac638afcd5171903514aa28e9 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Mon, 13 May 2013 07:50:47 +0000 Subject: Suppress GCC compiler warnings in release builds about variables that are only read in asserts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181689 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/VecUtils.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp index 80d61616aa..50d2af0f65 100644 --- a/lib/Transforms/Vectorize/VecUtils.cpp +++ b/lib/Transforms/Vectorize/VecUtils.cpp @@ -639,6 +639,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL, int VF) { Replaced = true; } assert(Replaced && "Must replace at least one outside user"); + (void)Replaced; } // We moved some instructions around. We have to number them again -- cgit v1.2.3 From fa709768b91df14bb7b5606700d92c3c10aba625 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 13 May 2013 18:29:07 +0000 Subject: [objc-arc] Move the before optimization statistics gathering phase out of OptimizeIndividualCalls. This makes the statistics gathering completely independent of the actual optimization occuring, preventing any sort of bleeding over from occuring. Additionally, it simplifies a switch statement in the non-statistic gathering case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181719 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 43e2e20035..6d2d630bb6 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -1440,11 +1440,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_RetainBlock: // If we strength reduce an objc_retainBlock to an objc_retain, continue // onto the objc_retain peephole optimizations. Otherwise break. - if (!OptimizeRetainBlockCall(F, Inst, Class)) - break; - // FALLTHROUGH - case IC_Retain: - ++NumRetainsBeforeOpt; + OptimizeRetainBlockCall(F, Inst, Class); break; case IC_RetainRV: if (OptimizeRetainRVCall(F, Inst)) @@ -1453,9 +1449,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_AutoreleaseRV: OptimizeAutoreleaseRVCall(F, Inst, Class); break; - case IC_Release: - ++NumReleasesBeforeOpt; - break; } // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. @@ -3050,6 +3043,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) { PA.setAA(&getAnalysis()); +#ifndef NDEBUG + if (AreStatisticsEnabled()) { + GatherStatistics(F, false); + } +#endif + // This pass performs several distinct transformations. As a compile-time aid // when compiling code that isn't ObjC, skip these if the relevant ObjC // library functions aren't declared. -- cgit v1.2.3 From 774a8cf2f5a5f8cd580c71f5eb3055a293917102 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 13 May 2013 19:40:39 +0000 Subject: [objc-arc-opts] Add comment to BBState making it clear that get{TopDown,BottomUp}PtrState will create a new PtrState object if it does not find a PtrState for Arg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 6d2d630bb6..d2d8325d1f 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -587,10 +587,16 @@ namespace { /// definition. void SetAsExit() { BottomUpPathCount = 1; } + /// Attempt to find the PtrState object describing the top down state for + /// pointer Arg. Return a new initialized PtrState describing the top down + /// state for Arg if we do not find one. PtrState &getPtrTopDownState(const Value *Arg) { return PerPtrTopDown[Arg]; } + /// Attempt to find the PtrState object describing the bottom up state for + /// pointer Arg. Return a new initialized PtrState describing the bottom up + /// state for Arg if we do not find one. PtrState &getPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp[Arg]; } -- cgit v1.2.3 From 9b5e6c0943dcfced64980240e25427cdc06c9bad Mon Sep 17 00:00:00 2001 From: Matt Beaumont-Gay Date: Mon, 13 May 2013 21:10:49 +0000 Subject: Move a couple more statistics inside '#ifndef NDEBUG'. Suppresses an unused-variable warning in -Asserts builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181733 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index d2d8325d1f..bab49e75b7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -303,11 +303,11 @@ STATISTIC(NumRets, "Number of return value forwarding " "retain+autoreleaes eliminated"); STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +#ifndef NDEBUG STATISTIC(NumRetainsBeforeOpt, "Number of retains before optimization."); STATISTIC(NumReleasesBeforeOpt, "Number of releases before optimization."); -#ifndef NDEBUG STATISTIC(NumRetainsAfterOpt, "Number of retains after optimization."); STATISTIC(NumReleasesAfterOpt, -- cgit v1.2.3 From acfb3584c58159ec20a8379c864c9d644f8d967e Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Mon, 13 May 2013 23:49:42 +0000 Subject: [objc-arc-opts] In the presense of an alloca unconditionally remove RR pairs if and only if we are both KnownSafeBU/KnownSafeTD rather than just either or. In the presense of a block being initialized, the frontend will emit the objc_retain on the original pointer and the release on the pointer loaded from the alloca. The optimizer will through the provenance analysis realize that the two are related (albiet different), but since we only require KnownSafe in one direction, will match the inner retain on the original pointer with the guard release on the original pointer. This is fixed by ensuring that in the presense of allocas we only unconditionally remove pointers if both our retain and our release are KnownSafe (i.e. we are KnownSafe in both directions) since we must deal with the possibility that the frontend will emit what (to the optimizer) appears to be unbalanced retain/releases. An example of the miscompile is: %A = alloca retain(%x) retain(%x) <--- Inner Retain store %x, %A %y = load %A ... DO STUFF ... release(%y) call void @use(%x) release(%x) <--- Guarding Release getting optimized to: %A = alloca retain(%x) store %x, %A %y = load %A ... DO STUFF ... release(%y) call void @use(%x) rdar://13750319 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 96 ++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 5 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index bab49e75b7..2932af10c2 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -107,6 +107,12 @@ namespace { return std::make_pair(Vector.begin() + Pair.first->second, false); } + iterator find(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + const_iterator find(const KeyT &Key) const { typename MapTy::const_iterator It = Map.find(Key); if (It == Map.end()) return Vector.end(); @@ -253,6 +259,40 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { return false; } +/// This is a wrapper around getUnderlyingObjCPtr along the lines of +/// GetUnderlyingObjects except that it returns early when it sees the first +/// alloca. +static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(V); + do { + const Value *P = Worklist.pop_back_val(); + P = GetUnderlyingObjCPtr(P); + + if (isa(P)) + return true; + + if (!Visited.insert(P)) + continue; + + if (const SelectInst *SI = dyn_cast(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (const PHINode *PN = dyn_cast(P)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + } while (!Worklist.empty()); + + return false; +} + + /// @} /// /// \defgroup ARCOpt ARC Optimization. @@ -414,8 +454,18 @@ namespace { /// sequence. SmallPtrSet ReverseInsertPts; + /// Does this pointer have multiple owners? + /// + /// In the presence of multiple owners with the same provenance caused by + /// allocas, we can not assume that the frontend will emit balanced code + /// since it could put the release on the pointer loaded from the + /// alloca. This confuses the optimizer so we must be more conservative in + /// that case. + bool MultipleOwners; + RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0), + MultipleOwners(false) {} void clear(); @@ -428,6 +478,7 @@ namespace { void RRInfo::clear() { KnownSafe = false; IsTailCallRelease = false; + MultipleOwners = false; ReleaseMetadata = 0; Calls.clear(); ReverseInsertPts.clear(); @@ -516,6 +567,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + RRI.MultipleOwners |= Other.RRI.MultipleOwners; // Merge the insert point sets. If there are any differences, // that makes this a partial merge. @@ -601,6 +653,12 @@ namespace { return PerPtrBottomUp[Arg]; } + /// Attempt to find the PtrState object describing the bottom up state for + /// pointer Arg. + ptr_iterator findPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp.find(Arg); + } + void clearBottomUpPointers() { PerPtrBottomUp.clear(); } @@ -1865,6 +1923,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case IC_None: // These are irrelevant. return NestingDetected; + case IC_User: + // If we have a store into an alloca of a pointer we are tracking, the + // pointer has multiple owners implying that we must be more conservative. + // + // This comes up in the context of a pointer being ``KnownSafe''. In the + // presense of a block being initialized, the frontend will emit the + // objc_retain on the original pointer and the release on the pointer loaded + // from the alloca. The optimizer will through the provenance analysis + // realize that the two are related, but since we only require KnownSafe in + // one direction, will match the inner retain on the original pointer with + // the guard release on the original pointer. This is fixed by ensuring that + // in the presense of allocas we only unconditionally remove pointers if + // both our retain and our release are KnownSafe. + if (StoreInst *SI = dyn_cast(Inst)) { + if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) { + BBState::ptr_iterator I = MyStates.findPtrBottomUpState( + StripPointerCastsAndObjCCalls(SI->getValueOperand())); + if (I != MyStates.bottom_up_ptr_end()) + I->second.RRI.MultipleOwners = true; + } + } + break; default: break; } @@ -2411,8 +2491,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap bool KnownSafe, bool &AnyPairsCompletelyEliminated) { // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. + // is already incremented, we can similarly ignore possible decrements unless + // we are dealing with a retainable object with multiple provenance sources. bool KnownSafeTD = true, KnownSafeBU = true; + bool MultipleOwners = false; // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. @@ -2431,6 +2513,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; KnownSafeTD &= NewRetainRRI.KnownSafe; + MultipleOwners |= NewRetainRRI.MultipleOwners; for (SmallPtrSet::const_iterator LI = NewRetainRRI.Calls.begin(), LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { @@ -2524,9 +2607,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap if (NewRetains.empty()) break; } - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { + // If the pointer is known incremented in 1 direction and we do not have + // MultipleOwners, we can safely remove the retain/releases. Otherwise we need + // to be known safe in both directions. + bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) || + ((KnownSafeTD || KnownSafeBU) && !MultipleOwners); + if (UnconditionallySafe) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); NewCount = 0; -- cgit v1.2.3 From dba53a8c9d4ece0bf44acbf5fce871f2e754d97d Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 14 May 2013 00:08:09 +0000 Subject: [objc-arc-opts] Added debug statements when we set and unset whether a pointer is known positive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181745 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 2932af10c2..5db4797aeb 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -508,10 +508,12 @@ namespace { Seq(S_None) {} void SetKnownPositiveRefCount() { + DEBUG(dbgs() << "Setting Known Positive.\n"); KnownPositiveRefCount = true; } void ClearKnownPositiveRefCount() { + DEBUG(dbgs() << "Clearing Known Positive.\n"); KnownPositiveRefCount = false; } -- cgit v1.2.3 From 123f18bcb9baeb6dc177cb642126a3a4d9ca8b43 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 14 May 2013 00:21:18 +0000 Subject: LoopVectorize: Handle loops with multiple forward inductions We used to give up if we saw two integer inductions. After this patch, we base further induction variables on the chosen one like we do in the reverse induction and pointer induction case. Fixes PR15720. radar://13851975 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 57 +++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 17 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0dd6abb1ae..0b29445dd0 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1389,9 +1389,10 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { - // Handle the integer induction counter: + // Handle the integer induction counter. assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); - assert(OrigPhi == OldInduction && "Unknown integer PHI"); + + // We have the canonical induction variable. if (OrigPhi == OldInduction) { // Create a truncated version of the resume value for the scalar loop, // we might have promoted the type to a larger width. @@ -1402,11 +1403,20 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); TruncResumeVal->addIncoming(EndValue, VecBody); + + // We know what the end value is. + EndValue = IdxEndRoundDown; + // We also know which PHI node holds it. + ResumeIndex = ResumeVal; + break; } - // We know what the end value is. - EndValue = IdxEndRoundDown; - // We also know which PHI node holds it. - ResumeIndex = ResumeVal; + + // Not the canonical induction variable - add the vector loop count to the + // start value. + Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, + II.StartValue->getType(), + "cast.crd"); + EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end"); break; } case LoopVectorizationLegality::IK_ReverseIntInduction: { @@ -2056,12 +2066,25 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { - assert(P == OldInduction && "Unexpected PHI"); - // We might have had to extend the type. - Value *Trunc = Builder.CreateTrunc(Induction, P->getType()); - Value *Broadcasted = getBroadcastInstrs(Trunc); - // After broadcasting the induction variable we need to make the - // vector consecutive by adding 0, 1, 2 ... + assert(P->getType() == II.StartValue->getType() && "Types must match"); + Type *PhiTy = P->getType(); + Value *Broadcasted; + if (P == OldInduction) { + // Handle the canonical induction variable. We might have had to + // extend the type. + Broadcasted = Builder.CreateTrunc(Induction, PhiTy); + } else { + // Handle other induction variables that are now based on the + // canonical one. + Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, + "normalized.idx"); + NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); + Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx, + "offset.idx"); + } + Broadcasted = getBroadcastInstrs(Broadcasted); + // After broadcasting the induction variable we need to make the vector + // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false); continue; @@ -2466,11 +2489,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction) { - if (Induction) { - DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n"); - return false; - } - Induction = Phi; + // Use the phi node with the widest type as induction. Use the last + // one if there are multiple (no good reason for doing this other + // than it is expedient). + if (!Induction || PhiTy == WidestIndTy) + Induction = Phi; } DEBUG(dbgs() << "LV: Found an induction variable.\n"); -- cgit v1.2.3 From cbe5f4c5d7a04d15dcc89a3f1de0936e5db54da7 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 14 May 2013 06:40:10 +0000 Subject: Removed trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181760 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 5db4797aeb..878da7d74c 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -269,19 +269,19 @@ static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) { do { const Value *P = Worklist.pop_back_val(); P = GetUnderlyingObjCPtr(P); - + if (isa(P)) return true; - + if (!Visited.insert(P)) continue; - + if (const SelectInst *SI = dyn_cast(P)) { Worklist.push_back(SI->getTrueValue()); Worklist.push_back(SI->getFalseValue()); continue; } - + if (const PHINode *PN = dyn_cast(P)) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) Worklist.push_back(PN->getIncomingValue(i)); -- cgit v1.2.3 From 5150270d071f05c6ea1fd26ef448954ddb5746dc Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 14 May 2013 21:52:44 +0000 Subject: GlobalOpt: fix an issue where CXAAtExitFn points to a deleted function. CXAAtExitFn was set outside a loop and before optimizations where functions can be deleted. This patch will set CXAAtExitFn inside the loop and after optimizations. Seg fault when running LTO because of accesses to a deleted function. rdar://problem/13838828 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181838 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 0ef900e2b9..4a9cb27b03 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -3323,8 +3323,6 @@ bool GlobalOpt::runOnModule(Module &M) { // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); - Function *CXAAtExitFn = FindCXAAtExit(M, TLI); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3342,7 +3340,9 @@ bool GlobalOpt::runOnModule(Module &M) { // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); - // Try to remove trivial global destructors. + // Try to remove trivial global destructors if they are not removed + // already. + Function *CXAAtExitFn = FindCXAAtExit(M, TLI); if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); -- cgit v1.2.3 From da2ed458b4e7066fc414c403173b882ccc2c8833 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 14 May 2013 23:36:24 +0000 Subject: Fix two typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181848 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87d56214a3..51ca29bc07 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -846,7 +846,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { /// FP value and: /// 1) 1/C is exact, or /// 2) reciprocal is allowed. -/// If the convertion was successful, the simplified expression "X * 1/C" is +/// If the conversion was successful, the simplified expression "X * 1/C" is /// returned; otherwise, NULL is returned. /// static Instruction *CvtFDivConstToReciprocal(Value *Dividend, -- cgit v1.2.3 From 1386692ef64d3151da8986589eadf0c58aba5c50 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Wed, 15 May 2013 01:44:30 +0000 Subject: LoopVectorize: Hoist conditional loads if possible InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 105 ++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 3 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0b29445dd0..a8e1f4579c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -318,6 +318,93 @@ private: ValueMap WidenMap; }; +/// \brief Check if conditionally executed loads are hoistable. +/// +/// This class has two functions. isHoistableLoad and canHoistAllLoads. +/// isHoistableLoad should be called on all load instructions that are executed +/// conditionally. After all conditional loads are processed, the client should +/// call canHoistAllLoads to determine if all of the conditional execute loads +/// have an unconditional memory access in the loop. +class LoadHoisting { + typedef SmallPtrSet MemorySet; + + Loop *TheLoop; + DominatorTree *DT; + MemorySet CondLoadAddrSet; + +public: + LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {} + + /// \brief Check if the instruction is a load with a identifiable address. + bool isHoistableLoad(Instruction *L); + + /// \brief Check if all of the conditional loads are hoistable because there + /// exists an unconditional memory access to the same address in the loop. + bool canHoistAllLoads(); +}; + +bool LoadHoisting::isHoistableLoad(Instruction *L) { + LoadInst *LI = dyn_cast(L); + if (!LI) + return false; + + CondLoadAddrSet.insert(LI->getPointerOperand()); + return true; +} + +static void addMemAccesses(BasicBlock *BB, SmallPtrSet &Set) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + Value *Addr = 0; + + // Try a load. + LoadInst *LI = dyn_cast(I); + if (LI) { + Addr = LI->getPointerOperand(); + Set.insert(Addr); + continue; + } + + // Try a store. + StoreInst *SI = dyn_cast(I); + if (!SI) + continue; + + Addr = SI->getPointerOperand(); + Set.insert(Addr); + } +} + +bool LoadHoisting::canHoistAllLoads() { + // No conditional loads. + if (CondLoadAddrSet.empty()) + return true; + + MemorySet UncondMemAccesses; + std::vector &LoopBlocks = TheLoop->getBlocksVector(); + BasicBlock *LoopLatch = TheLoop->getLoopLatch(); + + // Iterate over the unconditional blocks and collect memory access addresses. + for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) { + BasicBlock *BB = LoopBlocks[i]; + + // Ignore conditional blocks. + if (BB != LoopLatch && !DT->dominates(BB, LoopLatch)) + continue; + + addMemAccesses(BB, UncondMemAccesses); + } + + // And make sure there is a matching unconditional access for every + // conditional load. + for (MemorySet::iterator MI = CondLoadAddrSet.begin(), + ME = CondLoadAddrSet.end(); MI != ME; ++MI) + if (!UncondMemAccesses.count(*MI)) + return false; + + return true; +} + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -337,7 +424,8 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {} + Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false), + LoadSpeculation(L, DT) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -598,6 +686,9 @@ private: RuntimePointerCheck PtrRtCheck; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; + + /// Utility to determine whether loads can be speculated. + LoadHoisting LoadSpeculation; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -3259,8 +3350,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // We don't predicate loads/stores at the moment. - if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow()) + // We might be able to hoist the load. + if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it)) + return false; + + // We predicate stores at the moment. + if (it->mayWriteToMemory() || it->mayThrow()) return false; // The instructions below can trap. @@ -3274,6 +3369,10 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { } } + // Check that we can actually speculate the hoistable loads. + if (!LoadSpeculation.canHoistAllLoads()) + return false; + return true; } -- cgit v1.2.3 From e3ab7cca3abbc2dc5056695582b7a46abcc245ef Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Wed, 15 May 2013 02:02:45 +0000 Subject: LoopVectorize: Fix comments No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181862 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a8e1f4579c..58a1a74655 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -320,11 +320,11 @@ private: /// \brief Check if conditionally executed loads are hoistable. /// -/// This class has two functions. isHoistableLoad and canHoistAllLoads. +/// This class has two functions: isHoistableLoad and canHoistAllLoads. /// isHoistableLoad should be called on all load instructions that are executed /// conditionally. After all conditional loads are processed, the client should -/// call canHoistAllLoads to determine if all of the conditional execute loads -/// have an unconditional memory access in the loop. +/// call canHoistAllLoads to determine if all of the conditional executed loads +/// have an unconditional memory access to the same memory address in the loop. class LoadHoisting { typedef SmallPtrSet MemorySet; @@ -3354,7 +3354,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it)) return false; - // We predicate stores at the moment. + // We don't predicate stores at the moment. if (it->mayWriteToMemory() || it->mayThrow()) return false; -- cgit v1.2.3 From c292e68d43daf9dfb6a5c843ea11a25702d9d8a7 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 15 May 2013 17:43:03 +0000 Subject: [objc-arc] Fixed a spelling error and made the statistic descriptions be consistent about their usage of periods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181901 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/Transforms') diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 878da7d74c..4bf25facc6 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -340,18 +340,18 @@ STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); STATISTIC(NumRets, "Number of return value forwarding " - "retain+autoreleaes eliminated"); + "retain+autoreleases eliminated"); STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); #ifndef NDEBUG STATISTIC(NumRetainsBeforeOpt, - "Number of retains before optimization."); + "Number of retains before optimization"); STATISTIC(NumReleasesBeforeOpt, - "Number of releases before optimization."); + "Number of releases before optimization"); STATISTIC(NumRetainsAfterOpt, - "Number of retains after optimization."); + "Number of retains after optimization"); STATISTIC(NumReleasesAfterOpt, - "Number of releases after optimization."); + "Number of releases after optimization"); #endif namespace { -- cgit v1.2.3