diff options
author | Dan Gohman <gohman@apple.com> | 2010-04-07 22:27:08 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-04-07 22:27:08 +0000 |
commit | 448db1cdef5872713ef77beffacf502ae3450cd7 (patch) | |
tree | d8df55b2c06d8bebee028100f60d2126d376790d /lib | |
parent | b72e59e3615c4f8a8ac272629511814000cde5e0 (diff) | |
download | llvm-448db1cdef5872713ef77beffacf502ae3450cd7.tar.gz llvm-448db1cdef5872713ef77beffacf502ae3450cd7.tar.bz2 llvm-448db1cdef5872713ef77beffacf502ae3450cd7.tar.xz |
Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the
ability to track multiple post-increment loops on the same expression.
This refines the concept of "normalizing" SCEV expressions used for
to post-increment uses, and introduces a dedicated utility routine for
normalizing and denormalizing expressions.
This fixes the expansion of expressions which are post-increment users
of more than one loop at a time. More broadly, this takes LSR another
step closer to being able to reason about more than one loop at a time.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Analysis/IVUsers.cpp | 242 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 17 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionNormalization.cpp | 150 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 43 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 118 |
5 files changed, 346 insertions, 224 deletions
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 47b5d4a0f0..467f9dd840 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -62,120 +62,34 @@ static void CollectSubexprs(const SCEV *S, Ops.push_back(S); } -/// getSCEVStartAndStride - Compute the start and stride of this expression, -/// returning false if the expression is not a start/stride pair, or true if it -/// is. The stride must be a loop invariant expression, but the start may be -/// a mix of loop invariant and loop variant expressions. The start cannot, -/// however, contain an AddRec from a different loop, unless that loop is an -/// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, - const SCEV *&Start, const SCEV *&Stride, - ScalarEvolution *SE, DominatorTree *DT) { - const SCEV *TheAddRec = Start; // Initialize to zero. - - // If the outer level is an AddExpr, the operands are all start values except - // for a nested AddRecExpr. - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { - for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) - if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - Start = SE->getAddExpr(Start, AE->getOperand(i)); - } else if (isa<SCEVAddRecExpr>(SH)) { - TheAddRec = SH; - } else { - return false; // not analyzable. - } - - // Break down TheAddRec into its component parts. - SmallVector<const SCEV *, 4> Subexprs; - CollectSubexprs(TheAddRec, Subexprs, *SE); - - // Look for an addrec on the current loop among the parts. - const SCEV *AddRecStride = 0; - for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(), - E = Subexprs.end(); I != E; ++I) { - const SCEV *S = *I; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - if (AR->getLoop() == L) { - *I = AR->getStart(); - AddRecStride = AR->getStepRecurrence(*SE); - break; - } - } - if (!AddRecStride) - return false; - - // Add up everything else into a start value (which may not be - // loop-invariant). - const SCEV *AddRecStart = SE->getAddExpr(Subexprs); - - // Use getSCEVAtScope to attempt to simplify other loops out of - // the picture. - AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - - Start = SE->getAddExpr(Start, AddRecStart); - - // If stride is an instruction, make sure it properly dominates the header. - // Otherwise we could end up with a use before def situation. - if (!isa<SCEVConstant>(AddRecStride)) { - BasicBlock *Header = L->getHeader(); - if (!AddRecStride->properlyDominates(Header, DT)) - return false; +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) { + // Anything loop-invariant is interesting. + if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L)) + return true; - DEBUG(dbgs() << "["; - WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); - dbgs() << "] Variable stride: " << *AddRecStride << "\n"); + // An addrec is interesting if it's affine or if it has an interesting start. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Keep things simple. Don't touch loop-variant strides. + if (AR->getLoop() == L && (AR->isAffine() || !L->contains(I))) + return true; + // Otherwise recurse to see if the start value is interesting. + return isInteresting(AR->getStart(), I, L); } - Stride = AddRecStride; - return true; -} - -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - const Loop *L, DominatorTree *DT) { - // If the user is in the loop, use the preinc value. - if (L->contains(User)) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) + // An add is interesting if any of its operands is. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); + OI != OE; ++OI) + if (isInteresting(*OI, I, L)) + return true; return false; + } - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that are not dominated by the latch block, but (since - // their uses occur in the predecessor block, not the block the PHI lives in) - // should still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast<PHINode>(User); - if (!PN) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of IV by the PHI node. If any use corresponds to - // a block that is not dominated by the latch block, give up and use the - // preincremented value. - unsigned NumUses = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == IV) { - ++NumUses; - if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - } - - // Okay, all uses of IV by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; + // Nothing else is interesting here. + return false; } /// AddUsersIfInteresting - Inspect the specified instruction. If it is a @@ -196,16 +110,9 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { const SCEV *ISE = SE->getSCEV(I); if (isa<SCEVCouldNotCompute>(ISE)) return false; - // Get the start and stride for this expression. - Loop *UseLoop = LI->getLoopFor(I->getParent()); - const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); - const SCEV *Stride = Start; - - if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) - return false; // Non-reducible symbolic expression, bail out. - - // Keep things simple. Don't touch loop-variant strides. - if (!Stride->isLoopInvariant(L) && L->contains(I)) + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L)) return false; SmallPtrSet<Instruction *, 4> UniqueUsers; @@ -241,27 +148,24 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { } if (AddUserToIVUsers) { - // Okay, we found a user that we cannot reduce. Analyze the instruction - // and decide what to do with it. If we are a use inside of the loop, use - // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, DT)) { - // The value used will be incremented by the stride more than we are - // expecting, so subtract this off. - const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); - IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); - IVUses.back().setIsUseOfPostIncrementedValue(true); - DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); - } else { - IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I)); - } + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, ISE, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Transform the expression into a normalized form. + NewUse.Expr = + TransformForPostIncUse(NormalizeAutodetect, NewUse.Expr, + User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(dbgs() << " NORMALIZED TO: " << *NewUse.Expr << '\n'); } } return true; } -IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, +IVStrideUse &IVUsers::AddUser(const SCEV *Expr, Instruction *User, Value *Operand) { - IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); + IVUses.push_back(new IVStrideUse(this, Expr, User, Operand)); return IVUses.back(); } @@ -295,30 +199,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { - // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); - // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); - // Add the offset in a separate step, because it may be loop-variant. - RetVal = SE->getAddExpr(RetVal, U.getOffset()); - // For uses of post-incremented values, add an extra stride to compute - // the actual replacement value. - if (U.isUseOfPostIncrementedValue()) - RetVal = SE->getAddExpr(RetVal, U.getStride()); - return RetVal; -} - -/// getCanonicalExpr - Return a SCEV expression which computes the -/// value of the SCEV of the given IVStrideUse, ignoring the -/// isUseOfPostIncrementedValue flag. -const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const { - // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); - // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); - // Add the offset in a separate step, because it may be loop-variant. - RetVal = SE->getAddExpr(RetVal, U.getOffset()); - return RetVal; + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(U.PostIncLoops); + return TransformForPostIncUse(Denormalize, U.getExpr(), + U.getUser(), U.getOperandValToReplace(), + Loops, *SE, *DT); } void IVUsers::print(raw_ostream &OS, const Module *M) const { @@ -339,8 +223,13 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { WriteAsOperand(OS, UI->getOperandValToReplace(), false); OS << " = " << *getReplacementExpr(*UI); - if (UI->isUseOfPostIncrementedValue()) - OS << " (post-inc)"; + for (PostIncLoopSet::const_iterator + I = UI->PostIncLoops.begin(), + E = UI->PostIncLoops.end(); I != E; ++I) { + OS << " (post-inc with loop "; + WriteAsOperand(OS, (*I)->getHeader(), false); + OS << ")"; + } OS << " in "; UI->getUser()->print(OS, &Annotator); OS << '\n'; @@ -356,6 +245,39 @@ void IVUsers::releaseMemory() { IVUses.clear(); } +static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + if (AR->getLoop() == L) + return AR; + return findAddRecForLoop(AR->getStart(), L); + } + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + return AR; + return 0; + } + + return 0; +} + +const SCEV *IVStrideUse::getStride(const Loop *L) const { + if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(), L)) + return AR->getStepRecurrence(*Parent->SE); + return 0; +} + +void IVStrideUse::transformToPostInc(const Loop *L) { + PostIncLoopSet Loops; + Loops.insert(L); + Expr = TransformForPostIncUse(Normalize, Expr, + getUser(), getOperandValToReplace(), + Loops, *Parent->SE, *Parent->DT); + PostIncLoops.insert(L); +} + void IVStrideUse::deleted() { // Remove this user from the list. Parent->IVUses.erase(this); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 2e18ceac52..dd8ab431f3 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -966,9 +966,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Determine a normalized form of this expression, which is the expression // before any post-inc adjustment is made. const SCEVAddRecExpr *Normalized = S; - if (L == PostIncLoop) { - const SCEV *Step = S->getStepRecurrence(SE); - Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step)); + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. @@ -1002,7 +1005,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Accommodate post-inc mode, if necessary. Value *Result; - if (L != PostIncLoop) + if (!PostIncLoops.count(L)) Result = PN; else { // In PostInc mode, use the post-incremented value. @@ -1274,7 +1277,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) + if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); @@ -1296,7 +1299,7 @@ Value *SCEVExpander::expand(const SCEV *S) { Value *V = visit(S); // Remember the expanded value for this SCEV at this location. - if (!PostIncLoop) + if (PostIncLoops.empty()) InsertedExpressions[std::make_pair(S, InsertPt)] = V; restoreInsertPoint(SaveInsertBB, SaveInsertPt); @@ -1304,7 +1307,7 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - if (!PostIncLoop) + if (PostIncLoops.empty()) InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp new file mode 100644 index 0000000000..75c381d5ef --- /dev/null +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -0,0 +1,150 @@ +//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for working with "normalized" expressions. +// See the comments at the top of ScalarEvolutionNormalization.h for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +using namespace llvm; + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of IV by the PHI node. If any use corresponds to + // a block that is not dominated by the latch block, give up and use the + // preincremented value. + unsigned NumUses = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == IV) { + ++NumUses; + if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + } + + // Okay, all uses of IV by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { + const SCEV *O = X->getOperand(); + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + if (O != N) + switch (S->getSCEVType()) { + case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); + case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); + case scTruncate: return SE.getTruncateExpr(N, S->getType()); + default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); + } + return S; + } + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { + SmallVector<const SCEV *, 8> Operands; + bool Changed = false; + for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + Changed |= N != O; + Operands.push_back(N); + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + const Loop *L = AR->getLoop(); + const SCEV *Result = SE.getAddRecExpr(Operands, L); + switch (Kind) { + default: llvm_unreachable("Unexpected transform name!"); + case NormalizeAutodetect: + if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace)) + if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + Loops.insert(L); + } + break; + case Normalize: + if (Loops.count(L)) + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + break; + case Denormalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getAddExpr(Result, TransformedStep); + } + break; + } + return Result; + } + if (Changed) + switch (S->getSCEVType()) { + case scAddExpr: return SE.getAddExpr(Operands); + case scMulExpr: return SE.getMulExpr(Operands); + case scSMaxExpr: return SE.getSMaxExpr(Operands); + case scUMaxExpr: return SE.getUMaxExpr(Operands); + default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); + } + return S; + } + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { + const SCEV *LO = X->getLHS(); + const SCEV *RO = X->getRHS(); + const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, + Loops, SE, DT); + const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, + Loops, SE, DT); + if (LO != LN || RO != RN) + return SE.getUDivExpr(LN, RN); + return S; + } + llvm_unreachable("Unexpected SCEV kind!"); + return 0; +} diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6605666e45..1a58b6644f 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; } +// FIXME: It is an extremely bad idea to indvar substitute anything more +// complex than affine induction variables. Doing so will put expensive +// polynomial evaluations inside of the loop, and the str reduction pass +// currently can only reduce affine polynomials. For now just disable +// indvar subst on anything more complex than an affine addrec, unless +// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L) { + // Loop-invariant values are safe. + if (S->isLoopInvariant(L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L) && + isSafe(UD->getRHS(), L); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { SmallVector<WeakVH, 16> DeadInsts; @@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // the need for the code evaluation methods to insert induction variables // of different sizes. for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - const SCEV *Stride = UI->getStride(); Value *Op = UI->getOperandValToReplace(); const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); @@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // currently can only reduce affine polynomials. For now just disable // indvar subst on anything more complex than an affine addrec, unless // it can be expanded to a trivial value. - if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + if (!isSafe(AR, L)) continue; // Determine the insertion point for this user. By default, insert diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 625a75d6cc..631092b326 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -781,10 +781,10 @@ struct LSRFixup { /// will be replaced. Value *OperandValToReplace; - /// PostIncLoop - If this user is to use the post-incremented value of an + /// PostIncLoops - If this user is to use the post-incremented value of an /// induction variable, this variable is non-null and holds the loop /// associated with the induction variable. - const Loop *PostIncLoop; + PostIncLoopSet PostIncLoops; /// LUIdx - The index of the LSRUse describing the expression which /// this fixup needs, minus an offset (below). @@ -795,6 +795,8 @@ struct LSRFixup { /// offsets, for example in an unrolled loop. int64_t Offset; + bool isUseFullyOutsideLoop(const Loop *L) const; + LSRFixup(); void print(raw_ostream &OS) const; @@ -804,9 +806,24 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), PostIncLoop(0), + : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} +/// isUseFullyOutsideLoop - Test whether this fixup always uses its +/// value outside of the given loop. +bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { + // PHI nodes use their value in their incoming blocks. + if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == OperandValToReplace && + L->contains(PN->getIncomingBlock(i))) + return false; + return true; + } + + return !L->contains(UserInst); +} + void LSRFixup::print(raw_ostream &OS) const { OS << "UserInst="; // Store is common and interesting enough to be worth special-casing. @@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", OperandValToReplace="; WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); - if (PostIncLoop) { + for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(), + E = PostIncLoops.end(); I != E; ++I) { OS << ", PostIncLoop="; - WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); + WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false); } if (LUIdx != ~size_t(0)) @@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() { !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { // Conservatively assume there may be reuse if the quotient of their // strides could be a legal scale. - const SCEV *A = CondUse->getStride(); - const SCEV *B = UI->getStride(); + const SCEV *A = CondUse->getStride(L); + const SCEV *B = UI->getStride(L); + if (!A || !B) continue; if (SE.getTypeSizeInBits(A->getType()) != SE.getTypeSizeInBits(B->getType())) { if (SE.getTypeSizeInBits(A->getType()) > @@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), + CondUse = &IU.AddUser(CondUse->getExpr(), Cond, CondUse->getOperandValToReplace()); TermBr->replaceUsesOfWith(OldCond, Cond); } @@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() { // If we get to here, we know that we can transform the setcc instruction to // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. - CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), - CondUse->getStride())); - CondUse->setIsUseOfPostIncrementedValue(true); + CondUse->transformToPostInc(L); Changed = true; PostIncs.insert(Cond); @@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() { SmallSetVector<const SCEV *, 4> Strides; // Collect interesting types and strides. + SmallVector<const SCEV *, 4> Worklist; for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { - const SCEV *Stride = UI->getStride(); + const SCEV *Expr = UI->getExpr(); // Collect interesting types. - Types.insert(SE.getEffectiveSCEVType(Stride->getType())); - - // Add the stride for this loop. - Strides.insert(Stride); - - // Add strides for other mentioned loops. - for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); - AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) - Strides.insert(AR->getStepRecurrence(SE)); + Types.insert(SE.getEffectiveSCEVType(Expr->getType())); + + // Add strides for mentioned loops. + Worklist.push_back(Expr); + do { + const SCEV *S = Worklist.pop_back_val(); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + Strides.insert(AR->getStepRecurrence(SE)); + Worklist.push_back(AR->getStart()); + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + } + } while (!Worklist.empty()); } // Compute interesting factors from the set of interesting strides. @@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = UI->getUser(); LF.OperandValToReplace = UI->getOperandValToReplace(); - if (UI->isUseOfPostIncrementedValue()) - LF.PostIncLoop = L; + LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; const Type *AccessTy = 0; @@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { AccessTy = getAccessType(LF.UserInst); } - const SCEV *S = IU.getCanonicalExpr(*UI); + const SCEV *S = UI->getExpr(); // Equality (== and !=) ICmps are special. We can rewrite (i == N) as // (N - i == 0), and this allows (N - i) to be the expression that we work @@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; - LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; - LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); InsertSupplementalFormula(U, LU, LF.LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; @@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, SmallVectorImpl<WeakVH> &DeadInsts) const { const LSRUse &LU = Uses[LF.LUIdx]; - // Then, collect some instructions which we will remain dominated by when - // expanding the replacement. These must be dominated by any operands that + // Then, collect some instructions which must be dominated by the + // expanding replacement. These must be dominated by any operands that // will be required in the expansion. SmallVector<Instruction *, 4> Inputs; if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) @@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (Instruction *I = dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) Inputs.push_back(I); - if (LF.PostIncLoop) { - if (!L->contains(LF.UserInst)) + if (LF.PostIncLoops.count(L)) { + if (LF.isUseFullyOutsideLoop(L)) Inputs.push_back(L->getLoopLatch()->getTerminator()); else Inputs.push_back(IVIncInsertPos); @@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. - Rewriter.setPostInc(LF.PostIncLoop); + Rewriter.setPostInc(LF.PostIncLoops); // This is the type that the user actually needs. const Type *OpTy = LF.OperandValToReplace->getType(); @@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const SCEV *Reg = *I; assert(!Reg->isZero() && "Zero allocated in a base register!"); - // If we're expanding for a post-inc user for the add-rec's loop, make the - // post-inc adjustment. - const SCEV *Start = Reg; - while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { - if (AR->getLoop() == LF.PostIncLoop) { - Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); - // If the user is inside the loop, insert the code after the increment - // so that it is dominated by its operand. If the original insert point - // was already dominated by the increment, keep it, because there may - // be loop-variant operands that need to be respected also. - if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) { - IP = IVIncInsertPos; - while (isa<DbgInfoIntrinsic>(IP)) ++IP; - } - break; - } - Start = AR->getStart(); - } + // If we're expanding for a post-inc user, make the post-inc adjustment. + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); + Reg = TransformForPostIncUse(Denormalize, Reg, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); } @@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (F.AM.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; - // If we're expanding for a post-inc user for the add-rec's loop, make the - // post-inc adjustment. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) - if (AR->getLoop() == LF.PostIncLoop) - ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); + // If we're expanding for a post-inc user, make the post-inc adjustment. + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); + ScaledS = TransformForPostIncUse(Denormalize, ScaledS, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { // An interesting way of "folding" with an icmp is to use a negated @@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); // We're done expanding now, so reset the rewriter. - Rewriter.setPostInc(0); + Rewriter.clearPostInc(); // An ICmpZero Formula represents an ICmp which we're handling as a // comparison against zero. Now that we've expanded an expression for that |