summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorPreston Gurd <preston.gurd@intel.com>2013-04-25 20:29:37 +0000
committerPreston Gurd <preston.gurd@intel.com>2013-04-25 20:29:37 +0000
commitd6ac8e9a03d8fa7115079d86192bc4529e8281aa (patch)
tree9553fbaac5e6badb3c220a49e83147e96b44c70f /lib/Target
parent975b1ddf60387139357c8cbbaeb613de5a39294f (diff)
downloadllvm-d6ac8e9a03d8fa7115079d86192bc4529e8281aa.tar.gz
llvm-d6ac8e9a03d8fa7115079d86192bc4529e8281aa.tar.bz2
llvm-d6ac8e9a03d8fa7115079d86192bc4529e8281aa.tar.xz
This patch adds the X86FixupLEAs pass, which will reduce instruction
latency for certain models of the Intel Atom family, by converting instructions into their equivalent LEA instructions, when it is both useful and possible to do so. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180573 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp251
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp5
7 files changed, 270 insertions, 0 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index d14899d28a..7cb71f066c 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -33,6 +33,7 @@ set(sources
X86TargetObjectFile.cpp
X86TargetTransformInfo.cpp
X86VZeroUpper.cpp
+ X86FixupLEAs.cpp
)
if( CMAKE_CL_64 )
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1f9919f159..947002fd14 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
/// createX86PadShortFunctions - Return a pass that pads short functions
/// with NOOPs. This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
+/// createX86FixupLEAs - Return a a pass that selectively replaces
+/// certain instructions (like add, sub, inc, dec, some shifts,
+/// and some multiplies) by equivalent LEA instructions, in order
+/// to eliminate execution delays in some Atom processors.
+FunctionPass *createX86FixupLEAs();
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 306e3ac1af..87bb68d857 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+ "LEA instruction needs inputs at AG stage">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide,
FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
// "Arrandale" along with corei3 and corei5
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
new file mode 100644
index 0000000000..82e6de4221
--- /dev/null
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -0,0 +1,251 @@
+//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will find instructions which
+// can be re-written as LEA instructions in order to reduce pipeline
+// delays for some models of the Intel Atom family.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-fixup-LEAs"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumLEAs, "Number of LEA instructions created");
+
+namespace {
+ class FixupLEAPass : public MachineFunctionPass {
+ enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+ static char ID;
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+
+ virtual const char *getPassName() const { return "X86 Atom LEA Fixup";}
+ void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+ void processInstruction(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+ RegUsageState usesRegister(MachineOperand& p,
+ MachineBasicBlock::iterator I);
+ MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+ MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ public:
+ FixupLEAPass() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII; // Machine instruction info.
+ LiveVariables *LV;
+
+ };
+ char FixupLEAPass::ID = 0;
+}
+
+/// postRAConvertToLEA - if an instruction can be converted to an
+/// equivalent LEA, insert the new instruction into the basic block
+/// and return a pointer to it. Otherwise, return zero.
+MachineInstr *
+FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr* MI = MBBI;
+ MachineInstr* NewMI;
+ switch (MI->getOpcode()) {
+ case X86::MOV32rr:
+ case X86::MOV64rr: {
+ const MachineOperand& Src = MI->getOperand(1);
+ const MachineOperand& Dest = MI->getOperand(0);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(),
+ TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
+ .addOperand(Dest)
+ .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (!MI->getOperand(2).isImm()) {
+ // convertToThreeAddress will call getImm()
+ // which requires isImm() to be true
+ return 0;
+ }
+ }
+ return TII->convertToThreeAddress(MFI, MBBI, LV);
+}
+
+FunctionPass *llvm::createX86FixupLEAs() {
+ return new FixupLEAPass();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks,
+/// replacing instructions by equivalent LEA instructions
+/// if needed and when possible.
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = Func.getTarget().getInstrInfo();
+ TM = &MF->getTarget();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+
+ DEBUG(dbgs() << "Start X86FixupLEAs\n";);
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
+ processBasicBlock(Func, I);
+ DEBUG(dbgs() << "End X86FixupLEAs\n";);
+
+ return true;
+}
+
+/// usesRegister - Determine if an instruction references a machine register
+/// and, if so, whether it reads or writes the register.
+FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
+ MachineBasicBlock::iterator I) {
+ RegUsageState RegUsage = RU_NotUsed;
+ MachineInstr* MI = I;
+
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand& opnd = MI->getOperand(i);
+ if (opnd.isReg() && opnd.getReg() == p.getReg()){
+ if (opnd.isDef())
+ return RU_Write;
+ RegUsage = RU_Read;
+ }
+ }
+ return RegUsage;
+}
+
+/// getPreviousInstr - Given a reference to an instruction in a basic
+/// block, return a reference to the previous instruction in the block,
+/// wrapping around to the last instruction of the block if the block
+/// branches to itself.
+static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ if (I == MFI->begin()) {
+ if (MFI->isPredecessor(MFI)) {
+ I = --MFI->end();
+ return true;
+ }
+ else
+ return false;
+ }
+ --I;
+ return true;
+}
+
+/// searchBackwards - Step backwards through a basic block, looking
+/// for an instruction which writes a register within
+/// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ int InstrDistance = 1;
+ MachineBasicBlock::iterator CurInst;
+ static const int INSTR_DISTANCE_THRESHOLD = 5;
+
+ CurInst = I;
+ bool Found;
+ Found = getPreviousInstr(CurInst, MFI);
+ while( Found && I != CurInst) {
+ if (CurInst->isCall() || CurInst->isInlineAsm())
+ break;
+ if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
+ break; // too far back to make a difference
+ if (usesRegister(p, CurInst) == RU_Write){
+ return CurInst;
+ }
+ InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+ Found = getPreviousInstr(CurInst, MFI);
+ }
+ return 0;
+}
+
+/// processInstruction - Given a memory access or LEA instruction
+/// whose address mode uses a base and/or index register, look for
+/// an opportunity to replace the instruction which sets the base or index
+/// register with an equivalent LEA instruction.
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ // Process a load, store, or LEA instruction.
+ MachineInstr *MI = I;
+ int opcode = MI->getOpcode();
+ const MCInstrDesc& Desc = MI->getDesc();
+ int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
+ if (AddrOffset >= 0) {
+ AddrOffset += X86II::getOperandBias(Desc);
+ MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+ if (p.isReg() && p.getReg() != X86::ESP) {
+ seekLEAFixup(p, I, MFI);
+ }
+ MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+ if (q.isReg() && q.getReg() != X86::ESP) {
+ seekLEAFixup(q, I, MFI);
+ }
+ }
+}
+
+/// seekLEAFixup - Given a machine register, look for the instruction
+/// which writes it in the current basic block. If found,
+/// try to replace it with an equivalent LEA instruction.
+/// If replacement succeeds, then also process the the newly created
+/// instruction.
+void FixupLEAPass::seekLEAFixup(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+ if (MBI) {
+ MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI, LV);
+ if (NewMI) {
+ ++NumLEAs;
+ DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+ // now to replace with an equivalent LEA...
+ DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+ MFI->erase(MBI);
+ MachineBasicBlock::iterator J =
+ static_cast<MachineBasicBlock::iterator> (NewMI);
+ processInstruction(J, MFI);
+ }
+ }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// replacing adds and shifts with LEA instructions, where appropriate.
+bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
+ MachineFunction::iterator MFI) {
+
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ processInstruction(I, MFI);
+ return false;
+}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 14619b6392..448d2e6054 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -467,6 +467,7 @@ void X86Subtarget::initializeEnvironment() {
PostRAScheduler = false;
PadShortFunctions = false;
CallRegIndirect = false;
+ LEAUsesAG = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6fbdb1d5f0..66832b989b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -165,6 +165,9 @@ protected:
/// CallRegIndirect - True if the Calls with memory reference should be converted
/// to a register-based indirect call.
bool CallRegIndirect;
+ /// LEAUsesAG - True if the LEA instruction inputs have to be ready at
+ /// address generation (AG) time.
+ bool LEAUsesAG;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@@ -278,6 +281,7 @@ public:
bool hasSlowDivide() const { return HasSlowDivide; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
+ bool LEAusesAG() const { return LEAUsesAG; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8aa58a2042..00fa47f80b 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() {
addPass(createX86PadShortFunctions());
ShouldPrint = true;
}
+ if (getOptLevel() != CodeGenOpt::None &&
+ getX86Subtarget().LEAusesAG()){
+ addPass(createX86FixupLEAs());
+ ShouldPrint = true;
+ }
return ShouldPrint;
}