From 5db4289e404d76664f8aabe2675a4cc2d7b0e98e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 12 Apr 2011 21:30:53 +0000 Subject: SparseBitVector is SLOW. Use a Bitvector instead, we didn't need the smaller memory footprint anyway. This makes the greedy register allocator 10% faster. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129390 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocGreedy.cpp | 103 ++++++++++++++++++++++------------------- lib/CodeGen/SplitKit.h | 3 ++ 2 files changed, 58 insertions(+), 48 deletions(-) diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 8b5c1b0aa8..0e4e6eb30b 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -22,7 +22,6 @@ #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -126,13 +125,17 @@ class RAGreedy : public MachineFunctionPass, /// All basic blocks where the current register has uses. SmallVector SplitConstraints; - /// Live-through blocks that have already been added to SpillPlacer. - SparseBitVector<> ActiveThroughBlocks; - /// Global live range splitting candidate info. struct GlobalSplitCandidate { unsigned PhysReg; BitVector LiveBundles; + SmallVector ActiveBlocks; + + void reset(unsigned Reg) { + PhysReg = Reg; + LiveBundles.clear(); + ActiveBlocks.clear(); + } }; /// Candidate info for for each PhysReg in AllocationOrder. @@ -174,9 +177,9 @@ private: bool addSplitConstraints(InterferenceCache::Cursor, float&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); - void growRegion(InterferenceCache::Cursor); - float calcGlobalSplitCost(unsigned, const BitVector&); - void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, + void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); + float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl&); void calcGapWeights(unsigned, SmallVectorImpl&); SlotIndex getPrevMappedIndex(const MachineInstr*); @@ -288,6 +291,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { void RAGreedy::releaseMemory() { SpillerInstance.reset(0); LRStage.clear(); + GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -526,13 +530,16 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, SpillPlacer->addLinks(ArrayRef(TBS, T)); } -void RAGreedy::growRegion(InterferenceCache::Cursor Intf) { - // Keep track of through blocks that have already been added to SpillPlacer. - SparseBitVector<> Added; - SmallVector ThroughBlocks; +void RAGreedy::growRegion(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; #ifndef NDEBUG unsigned Visited = 0; #endif + for (;;) { ArrayRef NewBundles = SpillPlacer->getRecentPositive(); if (NewBundles.empty()) @@ -545,26 +552,26 @@ void RAGreedy::growRegion(InterferenceCache::Cursor Intf) { for (ArrayRef::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { unsigned Block = *I; - if (!SA->isThroughBlock(Block) || !Added.test_and_set(Block)) + if (!Todo.test(Block)) continue; + Todo.reset(Block); // This is a new through block. Add it to SpillPlacer later. - ThroughBlocks.push_back(Block); + ActiveBlocks.push_back(Block); #ifndef NDEBUG ++Visited; #endif } } // Any new blocks to add? - if (!ThroughBlocks.empty()) { - addThroughConstraints(Intf, ThroughBlocks); - ThroughBlocks.clear(); + if (ActiveBlocks.size() > AddedTo) { + ArrayRef Add(&ActiveBlocks[AddedTo], + ActiveBlocks.size() - AddedTo); + addThroughConstraints(Intf, Add); + AddedTo = ActiveBlocks.size(); } // Perhaps iterating can enable more bundles? SpillPlacer->iterate(); } - - // Rememeber the relevant set of through blocks for splitAroundRegion(). - ActiveThroughBlocks |= Added; DEBUG(dbgs() << ", v=" << Visited); } @@ -572,9 +579,10 @@ void RAGreedy::growRegion(InterferenceCache::Cursor Intf) { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, - const BitVector &LiveBundles) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { float GlobalCost = 0; + const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -591,10 +599,8 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); } - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - for (SparseBitVector<>::iterator I = ActiveThroughBlocks.begin(), - E = ActiveThroughBlocks.end(); I != E; ++I) { - unsigned Number = *I; + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; if (!RegIn && !RegOut) @@ -619,18 +625,20 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg, /// avoiding interference. The 'stack' interval is the complement constructed by /// SplitEditor. It will contain the rest. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, - const BitVector &LiveBundles, +void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, + GlobalSplitCandidate &Cand, SmallVectorImpl &NewVRegs) { + const BitVector &LiveBundles = Cand.LiveBundles; + DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) + dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) << " with bundles"; for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - InterferenceCache::Cursor Intf(IntfCache, PhysReg); + InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); LiveRangeEdit LREdit(VirtReg, NewVRegs, this); SE->reset(LREdit); @@ -815,9 +823,8 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, } // Handle live-through blocks. - for (SparseBitVector<>::iterator I = ActiveThroughBlocks.begin(), - E = ActiveThroughBlocks.end(); I != E; ++I) { - unsigned Number = *I; + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; DEBUG(dbgs() << "Live through BB#" << Number << '\n'); @@ -850,18 +857,17 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - BitVector LiveBundles, BestBundles; float BestCost = 0; - unsigned BestReg = 0; - ActiveThroughBlocks.clear(); + const unsigned NoCand = ~0u; + unsigned BestCand = NoCand; Order.rewind(); for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { if (GlobalCand.size() <= Cand) GlobalCand.resize(Cand+1); - GlobalCand[Cand].PhysReg = PhysReg; + GlobalCand[Cand].reset(PhysReg); - SpillPlacer->prepare(LiveBundles); + SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); float Cost; InterferenceCache::Cursor Intf(IntfCache, PhysReg); if (!addSplitConstraints(Intf, Cost)) { @@ -869,38 +875,39 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, continue; } DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); - if (BestReg && Cost >= BestCost) { - DEBUG(dbgs() << " worse than " << PrintReg(BestReg, TRI) << '\n'); + if (BestCand != NoCand && Cost >= BestCost) { + DEBUG(dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'); continue; } - growRegion(Intf); + growRegion(GlobalCand[Cand], Intf); SpillPlacer->finish(); // No live bundles, defer to splitSingleBlocks(). - if (!LiveBundles.any()) { + if (!GlobalCand[Cand].LiveBundles.any()) { DEBUG(dbgs() << " no bundles.\n"); continue; } - Cost += calcGlobalSplitCost(PhysReg, LiveBundles); + Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); DEBUG({ dbgs() << ", total = " << Cost << " with bundles"; - for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) + for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; + i = GlobalCand[Cand].LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - if (!BestReg || Cost < BestCost) { - BestReg = PhysReg; + if (BestCand == NoCand || Cost < BestCost) { + BestCand = Cand; BestCost = 0.98f * Cost; // Prevent rounding effects. - BestBundles.swap(LiveBundles); } } - if (!BestReg) + if (BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); + splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); return 0; } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index a387896a5d..688e4a5fcf 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -144,6 +144,9 @@ public: /// isThroughBlock - Return true if CurLI is live through MBB without uses. bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } + typedef SmallPtrSet BlockPtrSet; /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from -- cgit v1.2.3