diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/ExecutionDepsFix.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 52 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 5 |
3 files changed, 84 insertions, 0 deletions
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index d094411116..050edce2ec 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -471,11 +471,34 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); + // How many instructions since rx was last written? + unsigned Clearance = CurInstr - LiveRegs[rx].Def; LiveRegs[rx].Def = CurInstr; // Kill off domains redefined by generic instructions. if (Kill) kill(rx); + + // Verify clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (!Pref) + continue; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + TII->breakPartialRegDependency(MI, i, TRI); + continue; + } + + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK.\n"); + continue; + } + + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); } ++CurInstr; @@ -663,6 +686,10 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = Loops.size(); i != e; ++i) { MachineBasicBlock *MBB = Loops[i]; enterBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + if (!I->isDebugValue()) + processDefs(I, false); leaveBasicBlock(MBB); } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index d9ffd8161f..9428fffae8 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2761,6 +2761,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, /// static bool hasPartialRegUpdate(unsigned Opcode) { switch (Opcode) { + case X86::CVTSI2SSrr: + case X86::CVTSI2SS64rr: + case X86::CVTSI2SDrr: + case X86::CVTSI2SD64rr: case X86::CVTSD2SSrr: case X86::Int_CVTSD2SSrr: case X86::CVTSS2SDrr: @@ -2789,6 +2793,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) { return false; } +/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle +/// instructions we would like before a partial register update. +unsigned X86InstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode())) + return 0; + + // If MI is marked as reading Reg, the partial register update is wanted. + const MachineOperand &MO = MI->getOperand(0); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MO.readsReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else { + if (MI->readsRegister(Reg, TRI)) + return 0; + } + + // If any of the preceding 16 instructions are reading Reg, insert a + // dependency breaking instruction. The magic number is based on a few + // Nehalem experiments. + return 16; +} + +void X86InstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (X86::VR128RegClass.contains(Reg)) { + // These instructions are all floating point domain, so xorps is the best + // choice. + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) + .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + } else if (X86::VR256RegClass.contains(Reg)) { + // Use vxorps to clear the full ymm register. + // It wants to read and write the xmm sub-register. + unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg) + .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef) + .addReg(Reg, RegState::ImplicitDefine); + } else + return; + MI->addRegisterKilled(Reg, TRI, true); +} + MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 97009dbdbe..ee488d8f01 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -345,6 +345,11 @@ public: void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const; + void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, unsigned OpNum, |