Improve instruction scheduling for the PPC POWER7

Aside from a few minor latency corrections, the major change here is a new hazard recognizer which focuses on better dispatch-group formation on the POWER7. As with the PPC970's hazard recognizer, the most important thing it does is avoid load-after-store hazards within the same dispatch group. It uses the POWER7's special dispatch-group-terminating nop instruction (instead of inserting multiple regular nop instructions). This new hazard recognizer makes use of the scheduling dependency graph itself, built using AA information, to robustly detect the possibility of load-after-store hazards. significant test-suite performance changes (the error bars are 99.5% confidence intervals based on 5 test-suite runs both with and without the change -- speedups are negative): speedups: MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 -0.55171% +/- 0.333168% MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CrossingThresholds-dbl -17.5576% +/- 14.598% MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl -29.5708% +/- 7.09058% MultiSource/Benchmarks/TSVC/Reductions-flt/Reductions-flt -34.9471% +/- 11.4391% SingleSource/Benchmarks/BenchmarkGame/puzzle -25.1347% +/- 11.0104% SingleSource/Benchmarks/Misc/flops-8 -17.7297% +/- 9.79061% SingleSource/Benchmarks/Shootout-C++/ary3 -35.5018% +/- 23.9458% SingleSource/Regression/C/uint64_to_float -56.3165% +/- 25.4234% SingleSource/UnitTests/Vectorizer/gcc-loops -18.5309% +/- 6.8496% regressions: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 18.351% +/- 12.156% SingleSource/Benchmarks/Shootout-C++/methcall 27.3086% +/- 14.4733% git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197099 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-12-12 00:19:11 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-12-12 00:19:11 +0000
commit: f0c1388dd1b5cd6c297a9555aab7d0ade0f33827 (patch)
tree: d899364ac151a9ae9e1d3876be100be929661863 /lib/Target/PowerPC/PPCInstrInfo.cpp
parent: f15758b1d3b81f90a8c2b18c0487056d049d9bd9 (diff)
download: llvm-f0c1388dd1b5cd6c297a9555aab7d0ade0f33827.tar.gz
llvm-f0c1388dd1b5cd6c297a9555aab7d0ade0f33827.tar.bz2
llvm-f0c1388dd1b5cd6c297a9555aab7d0ade0f33827.tar.xz
1 files changed, 64 insertions, 2 deletions
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index c4582c540b..bd3b4924cc 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -74,6 +74,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
   const ScheduleDAG *DAG) const {
   unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
 
+  if (Directive == PPC::DIR_PWR7)
+    return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
+
   // Most subtargets use a PPC970 recognizer.
   if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
       Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
@@ -85,6 +88,56 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
   return new ScoreboardHazardRecognizer(II, DAG);
 }
 
+
+int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    const MachineInstr *DefMI, unsigned DefIdx,
+                                    const MachineInstr *UseMI,
+                                    unsigned UseIdx) const {
+  int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
+                                                   UseMI, UseIdx);
+
+  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+  unsigned Reg = DefMO.getReg();
+
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  bool IsRegCR;
+  if (TRI->isVirtualRegister(Reg)) {
+    const MachineRegisterInfo *MRI =
+      &DefMI->getParent()->getParent()->getRegInfo();
+    IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
+              MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
+  } else {
+    IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
+              PPC::CRBITRCRegClass.contains(Reg);
+  }
+
+  if (UseMI->isBranch() && IsRegCR) {
+    if (Latency < 0)
+      Latency = getInstrLatency(ItinData, DefMI);
+
+    // On some cores, there is an additional delay between writing to a condition
+    // register, and using it from a branch.
+    unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+    switch (Directive) {
+    default: break;
+    case PPC::DIR_7400:
+    case PPC::DIR_750:
+    case PPC::DIR_970:
+    case PPC::DIR_E5500:
+    case PPC::DIR_PWR4:
+    case PPC::DIR_PWR5:
+    case PPC::DIR_PWR5X:
+    case PPC::DIR_PWR6:
+    case PPC::DIR_PWR6X:
+    case PPC::DIR_PWR7:
+      Latency += 2;
+      break;
+    }
+  }
+
+  return Latency;
+}
+
 // Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
 bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
                                          unsigned &SrcReg, unsigned &DstReg,
@@ -218,11 +271,20 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 
 void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI) const {
+  // This function is used for scheduling, and the nop wanted here is the type
+  // that terminates dispatch groups on the POWER cores.
+  unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+  unsigned Opcode;
+  switch (Directive) {
+  default:            Opcode = PPC::NOP; break;
+  case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
+  case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+  }
+
   DebugLoc DL;
-  BuildMI(MBB, MI, DL, get(PPC::NOP));
+  BuildMI(MBB, MI, DL, get(Opcode));
 }
 
-
 // Branch analysis.
 // Note: If the condition register is set to CTR or CTR8 then this is a
 // BDNZ (imm == 1) or BDZ (imm == 0) branch.
author	Hal Finkel <hfinkel@anl.gov>	2013-12-12 00:19:11 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-12-12 00:19:11 +0000
commit	f0c1388dd1b5cd6c297a9555aab7d0ade0f33827 (patch)
tree	d899364ac151a9ae9e1d3876be100be929661863 /lib/Target/PowerPC/PPCInstrInfo.cpp
parent	f15758b1d3b81f90a8c2b18c0487056d049d9bd9 (diff)
download	llvm-f0c1388dd1b5cd6c297a9555aab7d0ade0f33827.tar.gz llvm-f0c1388dd1b5cd6c297a9555aab7d0ade0f33827.tar.bz2 llvm-f0c1388dd1b5cd6c297a9555aab7d0ade0f33827.tar.xz