summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2009-01-02 05:35:45 +0000
committerEvan Cheng <evan.cheng@apple.com>2009-01-02 05:35:45 +0000
commitccb6976a69a6e146db049fff8e6338e31c91b6f8 (patch)
treeb17dc585023e3b05a5f8b7a243aa0c68cc76ce46 /lib
parent018b7ee91a1dc8e1f87ea8c9133b5e44d9e57972 (diff)
downloadllvm-ccb6976a69a6e146db049fff8e6338e31c91b6f8.tar.gz
llvm-ccb6976a69a6e146db049fff8e6338e31c91b6f8.tar.bz2
llvm-ccb6976a69a6e146db049fff8e6338e31c91b6f8.tar.xz
Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61557 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86.td34
-rw-r--r--lib/Target/X86/X86InstrInfo.td5
-rw-r--r--lib/Target/X86/X86Subtarget.cpp36
-rw-r--r--lib/Target/X86/X86Subtarget.h5
4 files changed, 51 insertions, 29 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6d08b36be4..8867298abb 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -48,6 +48,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureSSE2]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -66,27 +68,27 @@ def : Proc<"i686", []>;
def : Proc<"pentiumpro", []>;
def : Proc<"pentium2", [FeatureMMX]>;
def : Proc<"pentium3", [FeatureSSE1]>;
-def : Proc<"pentium-m", [FeatureSSE2]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
-def : Proc<"x86-64", [Feature64Bit]>;
-def : Proc<"yonah", [FeatureSSE3]>;
-def : Proc<"prescott", [FeatureSSE3]>;
-def : Proc<"nocona", [FeatureSSE3, Feature64Bit]>;
-def : Proc<"core2", [FeatureSSSE3, Feature64Bit]>;
-def : Proc<"penryn", [FeatureSSE41, Feature64Bit]>;
+def : Proc<"x86-64", [Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
-def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>;
-def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>;
-def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>;
-def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>;
-def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>;
-def : Proc<"k8", [Feature3DNowA, Feature64Bit]>;
-def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"opteron", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"athlon64", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index be36cba5e4..b00ca64754 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -222,6 +222,7 @@ def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -2666,11 +2667,11 @@ def BT32rr : I<0xA3, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(X86bt (loadi16 addr:$src1), GR16:$src2),
- (implicit EFLAGS)]>, OpSize, TB;
+ (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(X86bt (loadi32 addr:$src1), GR32:$src2),
- (implicit EFLAGS)]>, TB;
+ (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
} // Defs = [EFLAGS]
// Sign/Zero extenders
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 106ce46525..33a7b45346 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -149,6 +149,18 @@ bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
return true;
}
+static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
union {
@@ -169,8 +181,15 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
- if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
- memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
+ IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
}
@@ -180,15 +199,9 @@ static const char *GetCurrentX86CPU() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
return "generic";
- unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
- unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7
- if (Family == 6 || Family == 0xf) {
- if (Family == 0xf)
- // Examine extended family ID if family ID is F.
- Family += (EAX >> 20) & 0xff; // Bits 20 - 27
- // Examine extended model ID if family ID is 6 or F.
- Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
- }
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
@@ -285,6 +298,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
+ , IsBTMemSlow(false)
, DarwinVers(0)
, IsLinux(false)
, stackAlignment(8)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f405ac798b..646a953370 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -64,6 +64,9 @@ protected:
/// HasX86_64 - True if the processor supports X86-64 instructions.
///
bool HasX86_64;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -127,6 +130,8 @@ public:
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+
unsigned getAsmFlavor() const {
return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
}