Prune the search for candidate formulae if the number of register

operands exceeds the number of registers used in the initial solution, as that wouldn't lead to a profitable solution anyway. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94107 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2010-01-21 22:42:49 +0000
committer: Dan Gohman <gohman@apple.com> 2010-01-21 22:42:49 +0000
commit: 8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8 (patch)
tree: bb2fd6686c3f2ba2b08dac5c10775f7936ed43b4 /lib/Transforms/Scalar
parent: 80ffc965f513bed2252316af8530c14c36c2e295 (diff)
download: llvm-8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8.tar.gz
llvm-8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8.tar.bz2
llvm-8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8.tar.xz
1 files changed, 67 insertions, 32 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b7a733c91f..a5b4fa4bcb 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1120,6 +1120,8 @@ public:
             const SmallVector<LSRUse, 16> &Uses, const Loop *L,
             ScalarEvolution &SE);
 
+  unsigned getNumRegs() const { return NumRegs; }
+
   bool operator<(const Score &Other) const;
 
   void print_details(raw_ostream &OS, const SCEV *Reg,
@@ -1383,6 +1385,11 @@ class LSRInstance {
   /// arbitrary index value to each register as a sort tie breaker.
   unsigned CurrentArbitraryRegIndex;
 
+  /// MaxNumRegs - To help prune the search for solutions, identify the number
+  /// of registers needed by the initial solution. No formula should require
+  /// more than this.
+  unsigned MaxNumRegs;
+
   /// Factors - Interesting factors between use strides.
   SmallSetVector<int64_t, 4> Factors;
 
@@ -1412,6 +1419,8 @@ class LSRInstance {
   void CountRegister(const SCEV *Reg, uint32_t Complexity, size_t LUIdx);
   void CountRegisters(const Formula &F, size_t LUIdx);
 
+  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
   void GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
                                    Formula Base);
   void GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
@@ -1913,6 +1922,23 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
     CountRegister(*I, Complexity, LUIdx);
 }
 
+/// InsertFormula - If the given formula has not yet been inserted, add it
+/// to the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+  // If a formula by itself would require more registers than the base solution,
+  // discard it and stop searching from it, as it won't be profitable. This is
+  // actually more permissive than it could be, because it doesn't include
+  // registers used by non-constant strides in F.
+  if (F.getNumRegs() > MaxNumRegs)
+    return false;
+
+  if (!LU.InsertFormula(F))
+    return false;
+
+  CountRegisters(LU.Formulae.back(), LUIdx);
+  return true;
+}
+
 /// GenerateSymbolicOffsetReuse - Generate reuse formulae using symbolic
 /// offsets.
 void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
@@ -1930,8 +1956,7 @@ void LSRInstance::GenerateSymbolicOffsetReuse(LSRUse &LU, unsigned LUIdx,
     if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
       continue;
     F.BaseRegs[i] = G;
-    if (LU.InsertFormula(F))
-      CountRegisters(LU.Formulae.back(), LUIdx);
+    (void)InsertFormula(LU, LUIdx, F);
   }
 }
 
@@ -1981,8 +2006,7 @@ void LSRInstance::GenerateICmpZeroScaledReuse(LSRUse &LU, unsigned LUIdx,
     }
 
     // If we make it here and it's legal, add it.
-    if (LU.InsertFormula(F))
-      CountRegisters(LU.Formulae.back(), LUIdx);
+    (void)InsertFormula(LU, LUIdx, F);
   next:;
   }
 }
@@ -2006,11 +2030,9 @@ LSRInstance::GenerateFormulaeFromReplacedBaseReg(
        E = AddOps.end(); I != E; ++I)
     F.BaseRegs.push_back(*I);
   F.AM.HasBaseReg = !F.BaseRegs.empty();
-  if (LU.InsertFormula(F)) {
-    CountRegisters(LU.Formulae.back(), LUIdx);
-    // Recurse.
+  if (InsertFormula(LU, LUIdx, F))
+    // If that formula hadn't been seen before, recurse to find more like it.
     GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
-  }
 }
 
 /// GenerateReassociationReuse - Split out subexpressions from adds and
@@ -2095,8 +2117,7 @@ void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
   }
   if (Ops.size() > 1) {
     F.BaseRegs.push_back(SE.getAddExpr(Ops));
-    if (LU.InsertFormula(F))
-      CountRegisters(LU.Formulae.back(), LUIdx);
+    (void)InsertFormula(LU, LUIdx, F);
   }
 }
 
@@ -2144,8 +2165,7 @@ void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
           std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
           NewF.BaseRegs.pop_back();
           NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
-          if (LU.InsertFormula(NewF))
-            CountRegisters(LU.Formulae.back(), LUIdx);
+          (void)InsertFormula(LU, LUIdx, NewF);
         }
       }
   }
@@ -2174,8 +2194,7 @@ void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
       for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
            JE = F.BaseRegs.end(); J != JE; ++J)
         *J = SE.getAnyExtendExpr(*J, SrcTy);
-      if (LU.InsertFormula(F))
-        CountRegisters(LU.Formulae.back(), LUIdx);
+      (void)InsertFormula(LU, LUIdx, F);
     }
   }
 }
@@ -2300,8 +2319,7 @@ void LSRInstance::GenerateConstantOffsetReuse() {
         const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
         if (Diff->isZero()) continue;
         NewF.ScaledReg = Diff;
-        if (LU.InsertFormula(NewF))
-          CountRegisters(LU.Formulae.back(), LUIdx);
+        (void)InsertFormula(LU, LUIdx, NewF);
       }
       // Use the immediate in a base register.
       for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
@@ -2320,8 +2338,7 @@ void LSRInstance::GenerateConstantOffsetReuse() {
                                         -(uint64_t)NewF.AM.BaseOffs)))
           continue;
         NewF.BaseRegs[N] = Diff;
-        if (LU.InsertFormula(NewF))
-          CountRegisters(LU.Formulae.back(), LUIdx);
+        (void)InsertFormula(LU, LUIdx, NewF);
       }
     }
   }
@@ -2358,8 +2375,19 @@ LSRInstance::GenerateAllReuseFormulae() {
 /// TODO: Should this give more weight to users inside the loop?
 void
 LSRInstance::GenerateLoopInvariantRegisterUses() {
-  for (size_t i = 0, e = RegSequence.size(); i != e; ++i)
-    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(RegSequence[i])) {
+  SmallVector<const SCEV *, 8> Worklist(RegSequence.begin(), RegSequence.end());
+
+  while (!Worklist.empty()) {
+    const SCEV *S = Worklist.pop_back_val();
+
+    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+      Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+    else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+      Worklist.push_back(C->getOperand());
+    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      Worklist.push_back(D->getLHS());
+      Worklist.push_back(D->getRHS());
+    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
       const Value *V = U->getValue();
       if (const Instruction *Inst = dyn_cast<Instruction>(V))
         if (L->contains(Inst)) continue;
@@ -2400,6 +2428,7 @@ LSRInstance::GenerateLoopInvariantRegisterUses() {
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
       }
     }
+  }
 }
 
 #ifndef NDEBUG
@@ -2424,7 +2453,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   : IU(P->getAnalysis<IVUsers>()),
     SE(P->getAnalysis<ScalarEvolution>()),
     DT(P->getAnalysis<DominatorTree>()),
-    TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0) {
+    TLI(tli), L(l), Changed(false), CurrentArbitraryRegIndex(0), MaxNumRegs(0) {
 
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm()) return;
@@ -2538,15 +2567,24 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   if (Types.size() == 1)
     Types.clear();
 
-  // Now use the reuse data to generate a bunch of interesting ways
-  // to formulate the values needed for the uses.
-  GenerateAllReuseFormulae();
-
   // If there are any uses of registers that we're tracking that have escaped
   // IVUsers' attention, add trivial uses for them, so that the register
   // voting process takes the into consideration.
   GenerateLoopInvariantRegisterUses();
 
+  // Start by assuming we'll assign each use its own register. This is
+  // sometimes called "full" strength reduction, or "superhero" mode.
+  // Sometimes this is the best solution, but if there are opportunities for
+  // reuse we may find a better solution.
+  Score CurScore;
+  CurScore.RateInitial(Uses, L, SE);
+
+  MaxNumRegs = CurScore.getNumRegs();
+
+  // Now use the reuse data to generate a bunch of interesting ways
+  // to formulate the values needed for the uses.
+  GenerateAllReuseFormulae();
+
   // Sort the formulae. TODO: This is redundantly sorted below.
   for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
        I != E; ++I) {
@@ -2559,13 +2597,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   // next step is to start looking at register reuse possibilities.
   DEBUG(print(dbgs()); dbgs() << '\n');
 
-  // Start by assuming we'll assign each use its own register. This is
-  // sometimes called "full" strength reduction, or "superhero" mode.
-  // Sometimes this is the best solution, but if there are opportunities for
-  // reuse we may find a better solution.
-  Score CurScore;
-  CurScore.RateInitial(Uses, L, SE);
-
   // Create a sorted list of registers with those with the most uses appearing
   // earlier in the list. We'll visit them first, as they're the most likely
   // to represent profitable reuse opportunities.
@@ -2669,6 +2700,10 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
 }
 
 void LSRInstance::print(raw_ostream &OS) const {
+  if (MaxNumRegs != 0)
+    OS << "LSR is considering " << MaxNumRegs << " to be the maximum "
+          "number of registers needed.\n";
+
   OS << "LSR has identified the following interesting factors and types: ";
   bool First = true;
author	Dan Gohman <gohman@apple.com>	2010-01-21 22:42:49 +0000
committer	Dan Gohman <gohman@apple.com>	2010-01-21 22:42:49 +0000
commit	8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8 (patch)
tree	bb2fd6686c3f2ba2b08dac5c10775f7936ed43b4 /lib/Transforms/Scalar
parent	80ffc965f513bed2252316af8530c14c36c2e295 (diff)
download	llvm-8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8.tar.gz llvm-8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8.tar.bz2 llvm-8b0ade3eb8281f9b332d5764cfe5c4ed3b2b32d8.tar.xz