diff options
-rw-r--r-- | include/llvm/Analysis/ScalarEvolutionExpander.h | 4 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 161 | ||||
-rw-r--r-- | test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll | 50 |
3 files changed, 189 insertions, 26 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 4433be000d..408e9152b8 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -260,7 +260,9 @@ namespace llvm { PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *ExpandTy, - Type *IntTy); + Type *IntTy, + Type *&TruncTy, + bool &InvertStep); Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy, Type *IntTy, bool useSubtract); }; diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index ea9de2f5a5..8c54058963 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1017,6 +1017,54 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, return IncV; } +/// \brief Hoist the addrec instruction chain rooted in the loop phi above the +/// position. This routine assumes that this is possible (has been checked). +static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi) { + do { + if (DT->dominates(InstToHoist, Pos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + InstToHoist->moveBefore(Pos); + Pos = InstToHoist; + InstToHoist = cast<Instruction>(InstToHoist->getOperand(0)); + } while (InstToHoist != LoopPhi); +} + +/// \brief Check whether we can cheaply express the requested SCEV in terms of +/// the available PHI SCEV by truncation and/or invertion of the step. +static bool canBeCheaplyTransformed(ScalarEvolution &SE, + const SCEVAddRecExpr *Phi, + const SCEVAddRecExpr *Requested, + bool &InvertStep) { + Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); + Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); + + if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) + return false; + + // Try truncate it if necessary. + Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); + if (!Phi) + return false; + + // Check whether truncation will help. + if (Phi == Requested) { + InvertStep = false; + return true; + } + + // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. + if (SE.getAddExpr(Requested->getStart(), + SE.getNegativeSCEV(Requested)) == Phi) { + InvertStep = true; + return true; + } + + return false; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -1024,49 +1072,87 @@ PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *ExpandTy, - Type *IntTy) { + Type *IntTy, + Type *&TruncTy, + bool &InvertStep) { assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { + PHINode *AddRecPhiMatch = 0; + Instruction *IncV = 0; + TruncTy = 0; + InvertStep = false; + + // Only try partially matching scevs that need truncation and/or + // step-inversion if we know this loop is outside the current loop. + bool TryNonMatchingSCEV = IVIncInsertLoop && + SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (!SE.isSCEVable(PN->getType()) || - (SE.getEffectiveSCEVType(PN->getType()) != - SE.getEffectiveSCEVType(Normalized->getType())) || - SE.getSCEV(PN) != Normalized) + if (!SE.isSCEVable(PN->getType())) continue; - Instruction *IncV = - cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN)); + if (!PhiSCEV) + continue; + bool IsMatchingSCEV = PhiSCEV == Normalized; + // We only handle truncation and inversion of phi recurrences for the + // expanded expression if the expanded expression's loop dominates the + // loop we insert to. Check now, so we can bail out early. + if (!IsMatchingSCEV && !TryNonMatchingSCEV) + continue; + + Instruction *TempIncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + // Check whether we can reuse this PHI node. if (LSRMode) { - if (!isExpandedAddRecExprPHI(PN, IncV, L)) + if (!isExpandedAddRecExprPHI(PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) continue; - } - else { - if (!isNormalAddRecExprPHI(PN, IncV, L)) + } else { + if (!isNormalAddRecExprPHI(PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast<Instruction>(IncV->getOperand(0)); - } while (IncV != PN); } + + // Stop if we have found an exact match SCEV. + if (IsMatchingSCEV) { + IncV = TempIncV; + TruncTy = 0; + InvertStep = false; + AddRecPhiMatch = PN; + break; + } + + // Try whether the phi can be translated into the requested form + // (truncated and/or offset by a constant). + if ((!TruncTy || InvertStep) && + canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { + // Record the phi node. But don't stop we might find an exact match + // later. + AddRecPhiMatch = PN; + IncV = TempIncV; + TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); + } + } + + if (AddRecPhiMatch) { + // Potentially, move the increment. We have made sure in + // isExpandedAddRecExprPHI or hoistIVInc that this is possible. + if (L == IVIncInsertLoop) + hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); + // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); + InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); - return PN; + return AddRecPhiMatch; } } @@ -1191,7 +1277,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + // In some cases, we decide to reuse an existing phi node but need to truncate + // it and/or invert the step. + Type *TruncTy = 0; + bool InvertStep = false; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, + TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1232,6 +1323,26 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { } } + // We have decided to reuse an induction variable of a dominating loop. Apply + // truncation and/or invertion of the step. + if (TruncTy) { + Type *ResTy = Result->getType(); + // Normalize the result type. + if (ResTy != SE.getEffectiveSCEVType(ResTy)) + Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); + // Truncate the result. + if (TruncTy != Result->getType()) { + Result = Builder.CreateTrunc(Result, TruncTy); + rememberInstruction(Result); + } + // Invert the result. + if (InvertStep) { + Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), + Result); + rememberInstruction(Result); + } + } + // Re-apply any non-loop-dominating scale. if (PostLoopScale) { assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); diff --git a/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll new file mode 100644 index 0000000000..5506994724 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll @@ -0,0 +1,50 @@ +; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @indvar_expansion(i8* nocapture readonly %rowsptr) { +entry: + br label %for.cond + +; SCEVExpander used to create induction variables in the loop %for.cond while +; expanding the recurrence start value of loop strength reduced values from +; %vector.body. + +; CHECK-LABEL: indvar_expansion +; CHECK: for.cond: +; CHECK-NOT: phi i3 +; CHECK: br i1 {{.+}}, label %for.cond + +for.cond: + %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ] + %cmp = icmp eq i8 undef, 0 + %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1 + br i1 %cmp, label %for.cond, label %for.cond2 + +for.cond2: + br i1 undef, label %for.cond2, label %for.body14.lr.ph + +for.body14.lr.ph: + %sext = shl i64 %indvars.iv44, 32 + %0 = ashr exact i64 %sext, 32 + %1 = sub i64 undef, %indvars.iv44 + %2 = and i64 %1, 4294967295 + %3 = add i64 %2, 1 + %fold = add i64 %1, 1 + %n.mod.vf = and i64 %fold, 7 + %n.vec = sub i64 %3, %n.mod.vf + %end.idx.rnd.down = add i64 %n.vec, %0 + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ] + %4 = getelementptr inbounds i8* %rowsptr, i64 %index + %5 = bitcast i8* %4 to <4 x i8>* + %wide.load = load <4 x i8>* %5, align 1 + %index.next = add i64 %index, 8 + %6 = icmp eq i64 %index.next, %end.idx.rnd.down + br i1 %6, label %for.end24, label %vector.body + +for.end24: + ret void +} |