summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2011-11-15 01:15:30 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2011-11-15 01:15:30 +0000
commitc2ecf3efbf375fc82bb1cea6afd7448498f9ae75 (patch)
tree4cb44beba42b77f41d955701edd3e69df3ec1b68 /lib
parent2947f730a96fc602ea008bba1929ae4f0638850a (diff)
downloadllvm-c2ecf3efbf375fc82bb1cea6afd7448498f9ae75.tar.gz
llvm-c2ecf3efbf375fc82bb1cea6afd7448498f9ae75.tar.bz2
llvm-c2ecf3efbf375fc82bb1cea6afd7448498f9ae75.tar.xz
Break false dependencies before partial register updates.
Two new TargetInstrInfo hooks lets the target tell ExecutionDepsFix about instructions with partial register updates causing false unwanted dependencies. The ExecutionDepsFix pass will break the false dependencies if the updated register was written in the previoius N instructions. The small loop added to sse-domains.ll runs twice as fast with dependency-breaking instructions inserted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144602 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp27
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp52
-rw-r--r--lib/Target/X86/X86InstrInfo.h5
3 files changed, 84 insertions, 0 deletions
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index d094411116..050edce2ec 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -471,11 +471,34 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
+ // How many instructions since rx was last written?
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
LiveRegs[rx].Def = CurInstr;
// Kill off domains redefined by generic instructions.
if (Kill)
kill(rx);
+
+ // Verify clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (!Pref)
+ continue;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ TII->breakPartialRegDependency(MI, i, TRI);
+ continue;
+ }
+
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK.\n");
+ continue;
+ }
+
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
}
++CurInstr;
@@ -663,6 +686,10 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
MachineBasicBlock *MBB = Loops[i];
enterBasicBlock(MBB);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ if (!I->isDebugValue())
+ processDefs(I, false);
leaveBasicBlock(MBB);
}
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index d9ffd8161f..9428fffae8 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2761,6 +2761,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
///
static bool hasPartialRegUpdate(unsigned Opcode) {
switch (Opcode) {
+ case X86::CVTSI2SSrr:
+ case X86::CVTSI2SS64rr:
+ case X86::CVTSI2SDrr:
+ case X86::CVTSI2SD64rr:
case X86::CVTSD2SSrr:
case X86::Int_CVTSD2SSrr:
case X86::CVTSS2SDrr:
@@ -2789,6 +2793,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
return false;
}
+/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// instructions we would like before a partial register update.
+unsigned X86InstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // If MI is marked as reading Reg, the partial register update is wanted.
+ const MachineOperand &MO = MI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (MO.readsReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else {
+ if (MI->readsRegister(Reg, TRI))
+ return 0;
+ }
+
+ // If any of the preceding 16 instructions are reading Reg, insert a
+ // dependency breaking instruction. The magic number is based on a few
+ // Nehalem experiments.
+ return 16;
+}
+
+void X86InstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (X86::VR128RegClass.contains(Reg)) {
+ // These instructions are all floating point domain, so xorps is the best
+ // choice.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
+ .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ } else if (X86::VR256RegClass.contains(Reg)) {
+ // Use vxorps to clear the full ymm register.
+ // It wants to read and write the xmm sub-register.
+ unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
+ .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
+ .addReg(Reg, RegState::ImplicitDefine);
+ } else
+ return;
+ MI->addRegisterKilled(Reg, TRI, true);
+}
+
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 97009dbdbe..ee488d8f01 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -345,6 +345,11 @@ public:
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+ unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const;
+
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
unsigned OpNum,