summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2008-12-16 03:35:01 +0000
committerDan Gohman <gohman@apple.com>2008-12-16 03:35:01 +0000
commit8749b61178228ba1fb2668034d79da1b247173d7 (patch)
tree636b5ac610b13cb87c049c0be682018b5a610d2f /lib/CodeGen
parent9a65d6afc203fb8e44a807f84e3d370f16b08a5a (diff)
downloadllvm-8749b61178228ba1fb2668034d79da1b247173d7.tar.gz
llvm-8749b61178228ba1fb2668034d79da1b247173d7.tar.bz2
llvm-8749b61178228ba1fb2668034d79da1b247173d7.tar.xz
Add initial support for back-scheduling address computations,
especially in the case of addresses computed from loop induction variables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61075 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp8
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp133
2 files changed, 140 insertions, 1 deletions
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 7ad9ac9b51..2e7b89c494 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -19,6 +19,14 @@
using namespace llvm;
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
unsigned LHSNum = LHS->NodeNum;
unsigned RHSNum = RHS->NodeNum;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index cfa639e940..c3209006b9 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "sched-instrs"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
@@ -29,6 +30,59 @@
#include <map>
using namespace llvm;
+namespace {
+ class VISIBILITY_HIDDEN LoopDependencies {
+ const MachineLoopInfo &MLI;
+ const MachineDominatorTree &MDT;
+
+ public:
+ typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+ LoopDeps;
+ LoopDeps Deps;
+
+ LoopDependencies(const MachineLoopInfo &mli,
+ const MachineDominatorTree &mdt) :
+ MLI(mli), MDT(mdt) {}
+
+ void VisitLoop(const MachineLoop *Loop) {
+ Deps.clear();
+ MachineBasicBlock *Header = Loop->getHeader();
+ SmallSet<unsigned, 8> LoopLiveIns;
+ for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+ LE = Header->livein_end(); LI != LE; ++LI)
+ LoopLiveIns.insert(*LI);
+
+ VisitRegion(MDT.getNode(Header), Loop, LoopLiveIns);
+ }
+
+ private:
+ void VisitRegion(const MachineDomTreeNode *Node,
+ const MachineLoop *Loop,
+ const SmallSet<unsigned, 8> &LoopLiveIns) {
+ MachineBasicBlock *MBB = Node->getBlock();
+ if (!Loop->contains(MBB)) return;
+
+ unsigned Count = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I, ++Count) {
+ const MachineInstr *MI = I;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (LoopLiveIns.count(MOReg))
+ Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+ }
+ }
+
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ VisitRegion(Children[I], Loop, LoopLiveIns);
+ }
+ };
+}
+
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
const TargetMachine &tm,
const MachineLoopInfo &mli,
@@ -65,9 +119,27 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
// all the work of the block is done before the terminator.
SUnit *Terminator = 0;
+ LoopDependencies LoopRegs(MLI, MDT);
+
+ // Track which regs are live into a loop, to help guide back-edge-aware
+ // scheduling.
+ SmallSet<unsigned, 8> LoopLiveInRegs;
+ if (MachineLoop *ML = MLI.getLoopFor(BB))
+ if (BB == ML->getLoopLatch()) {
+ MachineBasicBlock *Header = ML->getHeader();
+ for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+ E = Header->livein_end(); I != E; ++I)
+ LoopLiveInRegs.insert(*I);
+ LoopRegs.VisitLoop(ML);
+ }
+
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ unsigned SpecialAddressLatency =
+ TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
+
for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
MII != MIE; --MII) {
MachineInstr *MI = prior(MII);
@@ -118,7 +190,21 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
SUnit *UseSU = UseList[i];
if (UseSU != SU) {
- UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg));
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Do this for register aliases too.
+ if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+ assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+ if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+ UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
+ }
+ UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
}
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
@@ -130,6 +216,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
}
}
+ // If a def is going to wrap back around to the top of the loop,
+ // backschedule it.
+ // TODO: Blocks in loops without terminators can benefit too.
+ if (!UnitLatencies && Terminator && DefList.empty()) {
+ LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+ if (I != LoopRegs.Deps.end()) {
+ const MachineOperand *UseMO = I->second.first;
+ unsigned Count = I->second.second;
+ const MachineInstr *UseMI = UseMO->getParent();
+ unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ // TODO: If we knew the total depth of the region here, we could
+ // handle the case where the whole loop is inside the region but
+ // is large enough that the isScheduleHigh trick isn't needed.
+ if (UseMOIdx < UseTID.getNumOperands()) {
+ // Currently, we only support scheduling regions consisting of
+ // single basic blocks. Check to see if the instruction is in
+ // the same region by checking to see if it has the same parent.
+ if (UseMI->getParent() != MI->getParent()) {
+ unsigned Latency = SU->Latency;
+ if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+ Latency += SpecialAddressLatency;
+ // This is a wild guess as to the portion of the latency which
+ // will be overlapped by work done outside the current
+ // scheduling region.
+ Latency -= std::min(Latency, Count);
+ // Add the artifical edge.
+ Terminator->addPred(SDep(SU, SDep::Order, Latency,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ } else if (SpecialAddressLatency > 0 &&
+ UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+ // The entire loop body is within the current scheduling region
+ // and the latency of this operation is assumed to be greater
+ // than the latency of the loop.
+ // TODO: Recursively mark data-edge predecessors as
+ // isScheduleHigh too.
+ SU->isScheduleHigh = true;
+ }
+ }
+ LoopRegs.Deps.erase(I);
+ }
+ }
+
UseList.clear();
if (!MO.isDead())
DefList.clear();