From 9cae2d2225ba58a70ef8ff057feab6873f4af520 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Nov 2011 21:29:06 +0000 Subject: Add a hack to the scheduler to disable pseudo-two-address dependencies in basic blocks containing calls. This works around a problem in which these artificial dependencies can get tied up in calling seqeunce scheduling in a way that makes the graph unschedulable with the current approach of using artificial physical register dependencies for calling sequences. This fixes PR11314. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144124 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 16 +++++++++++++--- test/CodeGen/X86/fold-pcmpeqd-0.ll | 8 +------- .../X86/multiple-libcalls-and-twoaddr-deps-scheduling.ll | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/X86/multiple-libcalls-and-twoaddr-deps-scheduling.ll diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index cab303dd5c..f965a5e8ab 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1666,7 +1666,7 @@ public: protected: bool canClobber(const SUnit *SU, const SUnit *Op); - void AddPseudoTwoAddrDeps(); + void AddPseudoTwoAddrDeps(const TargetInstrInfo *TII); void PrescheduleNodesWithMultipleUses(); void CalculateSethiUllmanNumbers(); }; @@ -2628,7 +2628,7 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + AddPseudoTwoAddrDeps(TII); // Reroute edges to nodes with multiple uses. if (!TracksRegPressure) PrescheduleNodesWithMultipleUses(); @@ -2855,7 +2855,17 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { /// one that has a CopyToReg use (more likely to be a loop induction update). /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. -void RegReductionPQBase::AddPseudoTwoAddrDeps() { +void RegReductionPQBase::AddPseudoTwoAddrDeps(const TargetInstrInfo *TII) { + // If the graph contains any calls, disable this optimization. + // FIXME: This is a kludge to work around the fact that the artificial edges + // can combine with the way call sequences use physical register dependencies + // to model their resource usage to create unschedulable graphs. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + for (SDNode *Node = (*SUnits)[i].getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) + return; + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; if (!SU->isTwoAddress) diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll index 647bbdb7f0..6095a9cd20 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-0.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll @@ -3,15 +3,9 @@ ; This testcase shouldn't need to spill the -1 value, ; so it should just use pcmpeqd to materialize an all-ones vector. -; For i386, cp load of -1 are folded. -; With -regalloc=greedy, the live range is split before spilling, so the first -; pcmpeq doesn't get folded as a constant pool load. - -; I386-NOT: pcmpeqd -; I386: orps LCPI0_2, %xmm +; I386: pcmpeqd ; I386-NOT: pcmpeqd -; I386: orps LCPI0_2, %xmm ; X86-64: pcmpeqd ; X86-64-NOT: pcmpeqd diff --git a/test/CodeGen/X86/multiple-libcalls-and-twoaddr-deps-scheduling.ll b/test/CodeGen/X86/multiple-libcalls-and-twoaddr-deps-scheduling.ll new file mode 100644 index 0000000000..b114a57966 --- /dev/null +++ b/test/CodeGen/X86/multiple-libcalls-and-twoaddr-deps-scheduling.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=x86 -mcpu=pentium4 -mtriple=i686-none-linux < %s +; PR11314 + +; Make sure the scheduler's hack to insert artificial dependencies to optimize +; two-address instruction scheduling doesn't interfere with the scheduler's +; hack to model call sequences as artificial physical registers. + +define inreg { i64, i64 } @sscanf(i32 inreg %base.1.i) nounwind { +entry: + %conv38.i92.i = sext i32 %base.1.i to i64 + %rem.i93.i = urem i64 10, %conv38.i92.i + %div.i94.i = udiv i64 10, %conv38.i92.i + %a = insertvalue { i64, i64 } undef, i64 %rem.i93.i, 0 + %b = insertvalue { i64, i64 } %a, i64 %div.i94.i, 1 + ret { i64, i64 } %b +} -- cgit v1.2.3