summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h2
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp58
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp11
-rw-r--r--test/CodeGen/X86/pr3244.ll26
7 files changed, 114 insertions, 66 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 03d11e2d82..765c26a594 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -485,7 +485,7 @@ namespace llvm {
protected:
void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
- void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
/// ForceUnitLatencies - Return true if all scheduling edges should be given a
/// latency value of one. The default is to return false; schedulers may
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index d10d670d34..1f40771e3b 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -36,7 +36,7 @@ void ScheduleDAG::EmitNoop() {
TII->insertNoop(*BB, BB->end());
}
-void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
DenseMap<SUnit*, unsigned> &VRBaseMap) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
@@ -49,12 +49,11 @@ void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
unsigned Reg = 0;
for (SUnit::const_succ_iterator II = SU->Succs.begin(),
EE = SU->Succs.end(); II != EE; ++II) {
- if (I->getReg()) {
- Reg = I->getReg();
+ if (II->getReg()) {
+ Reg = II->getReg();
break;
}
}
- assert(I->getReg() && "Unknown physical register!");
TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
SU->CopyDstRC, SU->CopySrcRC);
} else {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 113dfb1751..b86492992c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
-STATISTIC(NumCCCopies, "Number of cross class copies");
+STATISTIC(NumPRCopies, "Number of physical copies");
static RegisterScheduler
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
@@ -93,10 +93,10 @@ private:
void ReleasePred(SUnit *SU, SDep *PredEdge);
void ScheduleNodeBottomUp(SUnit*, unsigned);
SUnit *CopyAndMoveSuccessors(SUnit*);
- void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
- const TargetRegisterClass*,
- const TargetRegisterClass*,
- SmallVector<SUnit*, 2>&);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp();
@@ -361,17 +361,16 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
DelDeps.push_back(std::make_pair(SuccSU, D));
}
}
- for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
- }
++NumDups;
return NewSU;
}
-/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
-/// and move all scheduled successors of the given SUnit to the last copy.
-void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
@@ -408,7 +407,7 @@ void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU);
- ++NumCCCopies;
+ ++NumPRCopies;
}
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
@@ -524,19 +523,22 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
- SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
if (!NewDef) {
- // Issue expensive cross register class copies.
- MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
- const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, VT);
- const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
- if (!DestRC) {
- assert(false && "Don't know how to copy this physical register!");
- abort();
- }
+ // Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
- InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DOUT << "Adding an edge from SU # " << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n";
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 65de7a5c8f..bc5443eaba 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
-STATISTIC(NumCCCopies, "Number of cross class copies");
+STATISTIC(NumPRCopies, "Number of physical register copies");
static RegisterScheduler
burrListDAGScheduler("list-burr",
@@ -121,10 +121,10 @@ private:
void UnscheduleNodeBottomUp(SUnit*);
void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
SUnit *CopyAndMoveSuccessors(SUnit*);
- void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
- const TargetRegisterClass*,
- const TargetRegisterClass*,
- SmallVector<SUnit*, 2>&);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleTopDown();
void ListScheduleBottomUp();
@@ -517,11 +517,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return NewSU;
}
-/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
-/// and move all scheduled successors of the given SUnit to the last copy.
-void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
SUnit *CopyFromSU = CreateNewSUnit(NULL);
CopyFromSU->CopySrcRC = SrcRC;
@@ -546,9 +546,8 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
DelDeps.push_back(std::make_pair(SuccSU, *I));
}
}
- for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
- }
AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
@@ -559,7 +558,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU);
- ++NumCCCopies;
+ ++NumPRCopies;
}
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
@@ -705,27 +704,32 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
}
if (!CurSU) {
- // Can't backtrack. Try duplicating the nodes that produces these
- // "expensive to copy" values to break the dependency. In case even
- // that doesn't work, insert cross class copies.
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
SUnit *TrySU = NotReady[0];
SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
- SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
if (!NewDef) {
- // Issue expensive cross register class copies.
- MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
- const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, VT);
- const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
- if (!DestRC) {
- assert(false && "Don't know how to copy this physical register!");
- abort();
- }
+ // Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
- InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DOUT << "Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n";
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index a8d904c2f6..c755086a8d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -39,11 +39,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
/// CheckForPhysRegDependency - Check if the dependency between def and use of
/// a specified operand is a physical register dependency. If so, returns the
-/// register.
+/// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
- unsigned &PhysReg) {
+ unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
@@ -55,8 +55,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
if (Def->isMachineOpcode()) {
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
if (ResNo >= II.getNumDefs() &&
- II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg;
+ const TargetRegisterClass *RC =
+ TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
}
}
@@ -179,10 +183,18 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0;
+ int Cost = 1;
// Determine if this is a physical register dependency.
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg);
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((PhysReg == 0 || !isChain) &&
"Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0)
+ PhysReg = 0;
SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpSU->Latency, PhysReg));
}
@@ -252,10 +264,12 @@ unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
- if (SU->getNode())
- SU->getNode()->dump(DAG);
- else
- cerr << "CROSS RC COPY ";
+ if (!SU->getNode()) {
+ cerr << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
cerr << "\n";
SmallVector<SDNode *, 4> FlaggedNodes;
for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
index ebe084d811..d617965158 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -629,6 +629,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// For pre-regalloc scheduling, create instructions corresponding to the
// SDNode and any flagged SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap);
+ continue;
+ }
+
SmallVector<SDNode *, 4> FlaggedNodes;
for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
FlaggedNodes.push_back(N);
@@ -636,10 +642,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap);
FlaggedNodes.pop_back();
}
- if (!SU->getNode())
- EmitCrossRCCopy(SU, CopyVRBaseMap);
- else
- EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
+ EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
}
return BB;
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
new file mode 100644
index 0000000000..0765f86405
--- /dev/null
+++ b/test/CodeGen/X86/pr3244.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llc -march=x86
+; PR3244
+
+@g_62 = external global i16 ; <i16*> [#uses=1]
+@g_487 = external global i32 ; <i32*> [#uses=1]
+
+define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
+entry:
+ %0 = load i16* @g_62, align 2 ; <i16> [#uses=1]
+ %1 = load i32* @g_487, align 4 ; <i32> [#uses=1]
+ %2 = trunc i16 %0 to i8 ; <i8> [#uses=1]
+ %3 = trunc i32 %1 to i8 ; <i8> [#uses=1]
+ %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+nounwind ; <i32> [#uses=1]
+ %5 = trunc i32 %4 to i8 ; <i8> [#uses=1]
+ %6 = mul i8 %3, %2 ; <i8> [#uses=1]
+ %7 = mul i8 %6, %5 ; <i8> [#uses=1]
+ %8 = sext i8 %7 to i16 ; <i16> [#uses=1]
+ %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind
+ ; <i32> [#uses=0]
+ ret i32 undef
+}
+
+declare i32 @func_7(...)
+
+declare i32 @func_85(i16 signext, i32, i32)