summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPreston Gurd <preston.gurd@intel.com>2013-03-04 18:13:57 +0000
committerPreston Gurd <preston.gurd@intel.com>2013-03-04 18:13:57 +0000
commit9a2cfffdb6340c54ff553c1b81364d0f17fa8f45 (patch)
tree999ad911d34f3f83d1140500e731228c5f07df84
parentff4faabd2d98776372f45f7592482c9239cc40f2 (diff)
downloadllvm-9a2cfffdb6340c54ff553c1b81364d0f17fa8f45.tar.gz
llvm-9a2cfffdb6340c54ff553c1b81364d0f17fa8f45.tar.bz2
llvm-9a2cfffdb6340c54ff553c1b81364d0f17fa8f45.tar.xz
Bypass Slow Divides
* Only apply divide bypass optimization when not optimizing for size. * Fixed bug caused by constant for 0 value of type Int32, used dividend type to generate the constant instead. * For atom x86-64 apply the divide bypass to use 16-bit divides instead of 64-bit divides when operand values are small enough. * Added lit tests for 64-bit divide bypass. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176442 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp4
-rw-r--r--test/CodeGen/X86/atom-bypass-slow-division-64.ll46
-rw-r--r--test/CodeGen/X86/atom-bypass-slow-division.ll2
5 files changed, 55 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 451acca5f3..5d12b0a392 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -181,9 +181,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
- // Bypass i32 with i8 on Atom when compiling with O2
- if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+ // Bypass expensive divides on Atom when compiling with O2
+ if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
addBypassSlowDiv(32, 8);
+ if (Subtarget->is64Bit())
+ addBypassSlowDiv(64, 16);
+ }
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index d71dd5dec6..015fd2e6e6 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 00cda8e034..1f517d038d 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
// Compare operand values and branch
- Value *ZeroV = MainBuilder.getInt32(0);
+ Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
// Get bitwidth of div/rem instruction
IntegerType *T = cast<IntegerType>(J->getType());
- int bitwidth = T->getBitWidth();
+ unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
new file mode 100644
index 0000000000..a3bbea3c99
--- /dev/null
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
+
+; Additional tests for 64-bit divide bypass
+
+define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+ %result = sdiv i64 %a, %b
+ ret i64 %result
+}
+
+define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+ %result = srem i64 %a, %b
+ ret i64 %result
+}
+
+define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: divw
+; CHECK: addq
+; CHECK: ret
+; CHECK-NOT: idivq
+; CHECK-NOT: divw
+ %resultdiv = sdiv i64 %a, %b
+ %resultrem = srem i64 %a, %b
+ %result = add i64 %resultdiv, %resultrem
+ ret i64 %result
+}
diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll
index 453e72672b..4612940445 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
; CHECK: Test_get_quotient: