summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/MachineScheduler.h5
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h25
-rw-r--r--lib/CodeGen/MachineScheduler.cpp73
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp4
-rw-r--r--test/CodeGen/X86/misched-copy.ll48
5 files changed, 141 insertions, 14 deletions
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 57febe7746..99cbd870ec 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -297,6 +297,10 @@ public:
/// reorderable instructions.
virtual void schedule();
+ /// Change the position of an instruction within the basic block and update
+ /// live ranges and region boundary iterators.
+ void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+
/// Get current register pressure for the top scheduled instructions.
const IntervalPressure &getTopPressure() const { return TopPressure; }
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
@@ -362,7 +366,6 @@ protected:
void updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure);
- void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
bool checkSchedLimit();
void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 8c959da696..e13636c250 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -302,6 +302,7 @@ namespace llvm {
bool isCallOp : 1; // Is a function call operand.
bool isTwoAddress : 1; // Is a two-address instruction.
bool isCommutable : 1; // Is a commutable instruction.
+ bool hasPhysRegUses : 1; // Has physreg uses.
bool hasPhysRegDefs : 1; // Has physreg defs that are being used.
bool hasPhysRegClobbers : 1; // Has any physreg defs, used or not.
bool isPending : 1; // True once pending.
@@ -331,10 +332,10 @@ namespace llvm {
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
- isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
- hasPhysRegClobbers(false), isPending(false), isAvailable(false),
- isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
- isCloned(false), SchedulingPref(Sched::None),
+ isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+ hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+ isAvailable(false), isScheduled(false), isScheduleHigh(false),
+ isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -345,10 +346,10 @@ namespace llvm {
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
- isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
- hasPhysRegClobbers(false), isPending(false), isAvailable(false),
- isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
- isCloned(false), SchedulingPref(Sched::None),
+ isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+ hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+ isAvailable(false), isScheduled(false), isScheduleHigh(false),
+ isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -358,10 +359,10 @@ namespace llvm {
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
- isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
- hasPhysRegClobbers(false), isPending(false), isAvailable(false),
- isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
- isCloned(false), SchedulingPref(Sched::None),
+ isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+ hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+ isAvailable(false), isScheduled(false), isScheduleHigh(false),
+ isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 5bd2349b50..736f6deae9 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -404,6 +404,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
}
}
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
MachineBasicBlock::iterator InsertPos) {
// Advance RegionBegin if the first instruction moves down.
@@ -916,7 +918,7 @@ public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason {
- NoCand, SingleExcess, SingleCritical, Cluster,
+ NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster,
ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
NodeOrder};
@@ -1191,6 +1193,8 @@ protected:
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
+ void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+
#ifndef NDEBUG
void traceCandidate(const SchedCandidate &Cand);
#endif
@@ -1696,6 +1700,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
}
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+ const MachineInstr *MI = SU->getInstr();
+ if (!MI->isCopy())
+ return 0;
+
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ return 0;
+}
+
/// Apply a set of heursitics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
@@ -1723,6 +1755,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
TryCand.Reason = NodeOrder;
return;
}
+
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+ biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ TryCand, Cand, PhysRegCopy))
+ return;
+
// Avoid exceeding the target's limit.
if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
@@ -1851,6 +1889,7 @@ const char *ConvergingScheduler::getReasonStr(
ConvergingScheduler::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
+ case PhysRegCopy: return "PREG-COPY";
case SingleExcess: return "REG-EXCESS";
case SingleCritical: return "REG-CRIT ";
case Cluster: return "CLUSTER ";
@@ -2069,17 +2108,49 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
return SU;
}
+void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+ MachineBasicBlock::iterator InsertPos = SU->getInstr();
+ if (!isTop)
+ ++InsertPos;
+ SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+ // Find already scheduled copies with a single physreg dependence and move
+ // them just above the scheduled instruction.
+ for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ continue;
+ SUnit *DepSU = I->getSUnit();
+ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+ continue;
+ MachineInstr *Copy = DepSU->getInstr();
+ if (!Copy->isCopy())
+ continue;
+ DEBUG(dbgs() << " Rescheduling physreg copy ";
+ I->getSUnit()->dump(DAG));
+ DAG->moveInstruction(Copy, InsertPos);
+ }
+}
+
/// Update the scheduler's state after scheduling a node. This is the same node
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
/// it's state based on the current cycle before MachineSchedStrategy does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = Top.CurrCycle;
Top.bumpNode(SU);
+ if (SU->hasPhysRegUses)
+ reschedulePhysRegCopies(SU, true);
}
else {
SU->BotReadyCycle = Bot.CurrCycle;
Bot.bumpNode(SU);
+ if (SU->hasPhysRegDefs)
+ reschedulePhysRegCopies(SU, false);
}
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 71e7a21ef2..e4da6a41ee 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (UseOp < 0)
Dep = SDep(SU, SDep::Artificial);
else {
+ // Set the hasPhysRegDefs only for physreg defs that have a use within
+ // the scheduling region.
+ SU->hasPhysRegDefs = true;
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
Dep.setMinLatency(
@@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
}
if (!MO.isDef()) {
+ SU->hasPhysRegUses = true;
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
new file mode 100644
index 0000000000..d04df999b9
--- /dev/null
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -0,0 +1,48 @@
+; RUN: llc %s -march=x86 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched 2>&1 | FileCheck %s
+;
+; Test scheduling of copy instructions.
+;
+; Argument copies should be hoisted to the top of the block.
+; Return copies should be sunk to the end.
+; MUL_HiLo PhysReg use copies should be just above the mul.
+; MUL_HiLo PhysReg def copies should be just below the mul.
+;
+; CHECK: *** Final schedule for BB#1 ***
+; CHECK-NEXT: %EAX<def> = COPY
+; CHECK: MUL32r %vreg{{[0-6]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
+; CHECK-NEXT: COPY %EAX;
+; CHECK-NEXT: COPY %EDX;
+; CHECK: DIVSSrm
+define i64 @mulhoist(i32 %a, i32 %b) #0 {
+entry:
+ br label %body
+
+body:
+ %convb = sitofp i32 %b to float
+ ; Generates an iMUL64r to legalize types.
+ %aa = zext i32 %a to i64
+ %mul = mul i64 %aa, 74383
+ ; Do some dependent long latency stuff.
+ %trunc = trunc i64 %mul to i32
+ %convm = sitofp i32 %trunc to float
+ %divm = fdiv float %convm, 0.75
+ ;%addmb = fadd float %divm, %convb
+ ;%divmb = fdiv float %addmb, 0.125
+ ; Do some independent long latency stuff.
+ %conva = sitofp i32 %a to float
+ %diva = fdiv float %conva, 0.75
+ %addab = fadd float %diva, %convb
+ %divab = fdiv float %addab, 0.125
+ br label %end
+
+end:
+ %val = fptosi float %divab to i64
+ %add = add i64 %mul, %val
+ ret i64 %add
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}