summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Volkov <avolkov.intel@gmail.com>2014-06-09 11:40:41 +0000
committerAlexey Volkov <avolkov.intel@gmail.com>2014-06-09 11:40:41 +0000
commita2bc6951a028412e7af5e68aa369daa4611147b3 (patch)
treee36b5b05d696ec346157589ad1e2ecb702e02123
parenta8d18fe94687b65f136b7222c2cc998c2148e39c (diff)
downloadllvm-a2bc6951a028412e7af5e68aa369daa4611147b3.tar.gz
llvm-a2bc6951a028412e7af5e68aa369daa4611147b3.tar.bz2
llvm-a2bc6951a028412e7af5e68aa369daa4611147b3.tar.xz
[X86] Use ADD/SUB instead of INC/DEC for Silvermont
According to Intel Software Optimization Manual on Silvermont INC or DEC instructions require an additional uop to merge the flags. As a result, a branch instruction depending on an INC or a DEC instruction incurs a 1 cycle penalty. Differential Revision: http://reviews.llvm.org/D3990 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210466 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--lib/Target/X86/X86InstrCompiler.td38
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h4
6 files changed, 37 insertions, 15 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6912b579b1..0ac801a9b0 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -168,6 +168,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
+def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+ "INC and DEC instructions are slower than ADD and SUB">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -228,7 +230,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeaturePCLMUL, FeatureAES,
FeatureCallRegIndirect,
FeaturePRFCHW,
- FeatureSlowLEA,
+ FeatureSlowLEA, FeatureSlowIncDec,
FeatureSlowBTMem, FeatureFastUAMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cbedf1f5f0..e11522db79 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10139,14 +10139,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
+ if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
+ if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 34d8fb9e18..9b3dce52f7 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1696,20 +1696,34 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
// Increment reg.
-def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
-def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+// Do not make INC if it is slow
+def : Pat<(add GR8:$src, 1),
+ (INC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
+def : Pat<(add GR16:$src, 1),
+ (INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR16:$src, 1),
+ (INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR32:$src, 1),
+ (INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR32:$src, 1),
+ (INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR64:$src, 1),
+ (INC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// Decrement reg.
-def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+// Do not make DEC if it is slow
+def : Pat<(add GR8:$src, -1),
+ (DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
+def : Pat<(add GR16:$src, -1),
+ (DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR16:$src, -1),
+ (DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR32:$src, -1),
+ (DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR32:$src, -1),
+ (DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR64:$src, -1),
+ (DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0d97669b22..5d34c326ac 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -795,6 +795,7 @@ def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
+def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 989e0d61b6..0df16c56ee 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -291,6 +291,7 @@ void X86Subtarget::initializeEnvironment() {
CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
+ SlowIncDec = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 703559a476..f89a2378ee 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -181,6 +181,9 @@ protected:
/// SlowLEA - True if the LEA instruction with certain arguments is slow
bool SlowLEA;
+ /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
+ bool SlowIncDec;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -319,6 +322,7 @@ public:
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
+ bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }