summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Volkov <avolkov.intel@gmail.com>2014-05-20 08:55:50 +0000
committerAlexey Volkov <avolkov.intel@gmail.com>2014-05-20 08:55:50 +0000
commit0d0bab5168239afddcc6a4fbcc73b101b570f563 (patch)
tree2b1c12e5abd648345f55aa87864a772354782836
parent6f242c93ec57db744194ae3052fa59db71716ffe (diff)
downloadllvm-0d0bab5168239afddcc6a4fbcc73b101b570f563.tar.gz
llvm-0d0bab5168239afddcc6a4fbcc73b101b570f563.tar.bz2
llvm-0d0bab5168239afddcc6a4fbcc73b101b570f563.tar.xz
[X86] Tune LEA usage for Silvermont
According to Intel Software Optimization Manual on Silvermont in some cases LEA is better to be replaced with ADD instructions: "The rule of thumb for ADDs and LEAs is that it is justified to use LEA with a valid index and/or displacement for non-destructive destination purposes (especially useful for stack offset cases), or to use a SCALE. Otherwise, ADD(s) are preferable." Differential Revision: http://reviews.llvm.org/D3826 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209198 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp89
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp8
-rw-r--r--lib/Target/X86/X86InstrInfo.h7
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h5
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp3
7 files changed, 102 insertions, 14 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index d01e462161..6912b579b1 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -166,6 +166,8 @@ def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"Call register indirect">;
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
+def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+ "LEA instruction with certain arguments is slow">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -226,6 +228,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeaturePCLMUL, FeatureAES,
FeatureCallRegIndirect,
FeaturePRFCHW,
+ FeatureSlowLEA,
FeatureSlowBTMem, FeatureFastUAMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 6886a65e4b..576f39d3c3 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -57,6 +57,11 @@ namespace {
void processInstruction(MachineBasicBlock::iterator& I,
MachineFunction::iterator MFI);
+ /// \brief Given a LEA instruction which is unprofitable
+ /// on Silvermont try to replace it with an equivalent ADD instruction
+ void processInstructionForSLM(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+
/// \brief Determine if an instruction references a machine register
/// and, if so, whether it reads or writes the register.
RegUsageState usesRegister(MachineOperand& p,
@@ -86,7 +91,7 @@ namespace {
private:
MachineFunction *MF;
const TargetMachine *TM;
- const TargetInstrInfo *TII; // Machine instruction info.
+ const X86InstrInfo *TII; // Machine instruction info.
};
char FixupLEAPass::ID = 0;
@@ -98,7 +103,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
MachineInstr* MI = MBBI;
MachineInstr* NewMI;
switch (MI->getOpcode()) {
- case X86::MOV32rr:
+ case X86::MOV32rr:
case X86::MOV64rr: {
const MachineOperand& Src = MI->getOperand(1);
const MachineOperand& Dest = MI->getOperand(0);
@@ -146,7 +151,7 @@ FunctionPass *llvm::createX86FixupLEAs() {
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
TM = &MF->getTarget();
- TII = TM->getInstrInfo();
+ TII = static_cast<const X86InstrInfo*>(TM->getInstrInfo());
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
@@ -243,9 +248,9 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
if (NewMI) {
++NumLEAs;
- DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+ DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
// now to replace with an equivalent LEA...
- DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+ DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
MFI->erase(MBI);
MachineBasicBlock::iterator J =
static_cast<MachineBasicBlock::iterator> (NewMI);
@@ -254,10 +259,80 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
}
}
+void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) {
+ MachineInstr *MI = I;
+ const int opcode = MI->getOpcode();
+ if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r &&
+ opcode != X86::LEA64_32r)
+ return;
+ if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() ||
+ !TII->isSafeToClobberEFLAGS(*MFI, I))
+ return;
+ const unsigned DstR = MI->getOperand(0).getReg();
+ const unsigned SrcR1 = MI->getOperand(1).getReg();
+ const unsigned SrcR2 = MI->getOperand(3).getReg();
+ if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
+ return;
+ if (MI->getOperand(2).getImm() > 1)
+ return;
+ int addrr_opcode, addri_opcode;
+ switch (opcode) {
+ case X86::LEA16r:
+ addrr_opcode = X86::ADD16rr;
+ addri_opcode = X86::ADD16ri;
+ break;
+ case X86::LEA32r:
+ addrr_opcode = X86::ADD32rr;
+ addri_opcode = X86::ADD32ri;
+ break;
+ case X86::LEA64_32r:
+ case X86::LEA64r:
+ addrr_opcode = X86::ADD64rr;
+ addri_opcode = X86::ADD64ri32;
+ break;
+ default:
+ assert(false && "Unexpected LEA instruction");
+ }
+ DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
+ DEBUG(dbgs() << "FixLEA: Replaced by: ";);
+ MachineInstr *NewMI = 0;
+ const MachineOperand &Dst = MI->getOperand(0);
+ // Make ADD instruction for two registers writing to LEA's destination
+ if (SrcR1 != 0 && SrcR2 != 0) {
+ const MachineOperand &Src1 = MI->getOperand(SrcR1 == DstR ? 1 : 3);
+ const MachineOperand &Src2 = MI->getOperand(SrcR1 == DstR ? 3 : 1);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addrr_opcode))
+ .addOperand(Dst)
+ .addOperand(Src1)
+ .addOperand(Src2);
+ MFI->insert(I, NewMI);
+ DEBUG(NewMI->dump(););
+ }
+ // Make ADD instruction for immediate
+ if (MI->getOperand(4).getImm() != 0) {
+ const MachineOperand &SrcR = MI->getOperand(SrcR1 == DstR ? 1 : 3);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addri_opcode))
+ .addOperand(Dst)
+ .addOperand(SrcR)
+ .addImm(MI->getOperand(4).getImm());
+ MFI->insert(I, NewMI);
+ DEBUG(NewMI->dump(););
+ }
+ if (NewMI) {
+ MFI->erase(I);
+ I = static_cast<MachineBasicBlock::iterator>(NewMI);
+ }
+}
+
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
MachineFunction::iterator MFI) {
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- processInstruction(I, MFI);
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
+ if (TM->getSubtarget<X86Subtarget>().isSLM())
+ processInstructionForSLM(I, MFI);
+ else
+ processInstruction(I, MFI);
+ }
return false;
}
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 6a2854790d..efb5c70c81 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1728,12 +1728,8 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
return true;
}
-/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
-/// would clobber the EFLAGS condition register. Note the result may be
-/// conservative. If it cannot definitely determine the safety after visiting
-/// a few instructions in each direction it assumes it's not safe.
-static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
+bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
MachineBasicBlock::iterator E = MBB.end();
// For compile time consideration, if we are not able to determine the
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index cf8a9fa66a..5f3491536d 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -359,6 +359,13 @@ public:
/// instruction that defines the specified register class.
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
+ /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha
+ /// would clobber the EFLAGS condition register. Note the result may be
+ /// conservative. If it cannot definitely determine the safety after visiting
+ /// a few instructions in each direction it assumes it's not safe.
+ bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
return X86II::isX86_64ExtendedReg(MO.getReg());
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 31c85a7835..b94bd712ff 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -282,6 +282,7 @@ void X86Subtarget::initializeEnvironment() {
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
+ SlowLEA = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 5d2dcefb07..8ec680efff 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -178,6 +178,9 @@ protected:
/// address generation (AG) time.
bool LEAUsesAG;
+ /// SlowLEA - True if the LEA instruction with certain arguments is slow
+ bool SlowLEA;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -315,11 +318,13 @@ public:
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
+ bool slowLEA() const { return SlowLEA; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
+ bool isSLM() const { return X86ProcFamily == IntelSLM; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index cb5ab0dd50..d0449f4232 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -226,7 +226,8 @@ bool X86PassConfig::addPreEmitPass() {
ShouldPrint = true;
}
if (getOptLevel() != CodeGenOpt::None &&
- getX86Subtarget().LEAusesAG()){
+ (getX86Subtarget().LEAusesAG() ||
+ getX86Subtarget().slowLEA())){
addPass(createX86FixupLEAs());
ShouldPrint = true;
}