diff options
author | Dan Gohman <gohman@apple.com> | 2008-12-16 03:35:01 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2008-12-16 03:35:01 +0000 |
commit | 8749b61178228ba1fb2668034d79da1b247173d7 (patch) | |
tree | 636b5ac610b13cb87c049c0be682018b5a610d2f /lib/CodeGen | |
parent | 9a65d6afc203fb8e44a807f84e3d370f16b08a5a (diff) | |
download | llvm-8749b61178228ba1fb2668034d79da1b247173d7.tar.gz llvm-8749b61178228ba1fb2668034d79da1b247173d7.tar.bz2 llvm-8749b61178228ba1fb2668034d79da1b247173d7.tar.xz |
Add initial support for back-scheduling address computations,
especially in the case of addresses computed from loop induction
variables.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61075 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/LatencyPriorityQueue.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 133 |
2 files changed, 140 insertions, 1 deletions
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 7ad9ac9b51..2e7b89c494 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -19,6 +19,14 @@ using namespace llvm; bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + unsigned LHSNum = LHS->NodeNum; unsigned RHSNum = RHS->NodeNum; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index cfa639e940..c3209006b9 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "sched-instrs" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -29,6 +30,59 @@ #include <map> using namespace llvm; +namespace { + class VISIBILITY_HIDDEN LoopDependencies { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + public: + typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > + LoopDeps; + LoopDeps Deps; + + LoopDependencies(const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) : + MLI(mli), MDT(mdt) {} + + void VisitLoop(const MachineLoop *Loop) { + Deps.clear(); + MachineBasicBlock *Header = Loop->getHeader(); + SmallSet<unsigned, 8> LoopLiveIns; + for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), + LE = Header->livein_end(); LI != LE; ++LI) + LoopLiveIns.insert(*LI); + + VisitRegion(MDT.getNode(Header), Loop, LoopLiveIns); + } + + private: + void VisitRegion(const MachineDomTreeNode *Node, + const MachineLoop *Loop, + const SmallSet<unsigned, 8> &LoopLiveIns) { + MachineBasicBlock *MBB = Node->getBlock(); + if (!Loop->contains(MBB)) return; + + unsigned Count = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I, ++Count) { + const MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (LoopLiveIns.count(MOReg)) + Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); + } + } + + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + for (unsigned I = 0, E = Children.size(); I != E; ++I) + VisitRegion(Children[I], Loop, LoopLiveIns); + } + }; +} + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb, const TargetMachine &tm, const MachineLoopInfo &mli, @@ -65,9 +119,27 @@ void ScheduleDAGInstrs::BuildSchedUnits() { // all the work of the block is done before the terminator. SUnit *Terminator = 0; + LoopDependencies LoopRegs(MLI, MDT); + + // Track which regs are live into a loop, to help guide back-edge-aware + // scheduling. + SmallSet<unsigned, 8> LoopLiveInRegs; + if (MachineLoop *ML = MLI.getLoopFor(BB)) + if (BB == ML->getLoopLatch()) { + MachineBasicBlock *Header = ML->getHeader(); + for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), + E = Header->livein_end(); I != E; ++I) + LoopLiveInRegs.insert(*I); + LoopRegs.VisitLoop(ML); + } + // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); + // Ask the target if address-backscheduling is desirable, and if so how much. + unsigned SpecialAddressLatency = + TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency(); + for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin(); MII != MIE; --MII) { MachineInstr *MI = prior(MII); @@ -118,7 +190,21 @@ void ScheduleDAGInstrs::BuildSchedUnits() { for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; if (UseSU != SU) { - UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg)); + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Do this for register aliases too. + if (SpecialAddressLatency != 0 && !UnitLatencies) { + MachineInstr *UseMI = UseSU->getInstr(); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); + assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); + if ((UseTID.mayLoad() || UseTID.mayStore()) && + (unsigned)RegUseIndex < UseTID.getNumOperands() && + UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg)); } } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { @@ -130,6 +216,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() { } } + // If a def is going to wrap back around to the top of the loop, + // backschedule it. + // TODO: Blocks in loops without terminators can benefit too. + if (!UnitLatencies && Terminator && DefList.empty()) { + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); + if (I != LoopRegs.Deps.end()) { + const MachineOperand *UseMO = I->second.first; + unsigned Count = I->second.second; + const MachineInstr *UseMI = UseMO->getParent(); + unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + // TODO: If we knew the total depth of the region here, we could + // handle the case where the whole loop is inside the region but + // is large enough that the isScheduleHigh trick isn't needed. + if (UseMOIdx < UseTID.getNumOperands()) { + // Currently, we only support scheduling regions consisting of + // single basic blocks. Check to see if the instruction is in + // the same region by checking to see if it has the same parent. + if (UseMI->getParent() != MI->getParent()) { + unsigned Latency = SU->Latency; + if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + Latency += SpecialAddressLatency; + // This is a wild guess as to the portion of the latency which + // will be overlapped by work done outside the current + // scheduling region. + Latency -= std::min(Latency, Count); + // Add the artifical edge. + Terminator->addPred(SDep(SU, SDep::Order, Latency, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } else if (SpecialAddressLatency > 0 && + UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + // The entire loop body is within the current scheduling region + // and the latency of this operation is assumed to be greater + // than the latency of the loop. + // TODO: Recursively mark data-edge predecessors as + // isScheduleHigh too. + SU->isScheduleHigh = true; + } + } + LoopRegs.Deps.erase(I); + } + } + UseList.clear(); if (!MO.isDead()) DefList.clear(); |