summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-01-01 03:32:16 +0000
committerChris Lattner <sabre@nondot.org>2010-01-01 03:32:16 +0000
commitf031e8ad011e9ad95d7c965936da07e3a9c42add (patch)
treef548e6e940434f362f095323d22b87f189df7433 /test/CodeGen/X86
parent0fba8cf9ffbb4397d3ddbb51469c8819d0faf3cc (diff)
downloadllvm-f031e8ad011e9ad95d7c965936da07e3a9c42add.tar.gz
llvm-f031e8ad011e9ad95d7c965936da07e3a9c42add.tar.bz2
llvm-f031e8ad011e9ad95d7c965936da07e3a9c42add.tar.xz
Teach codegen to lower llvm.powi to an efficient (but not optimal)
multiply sequence when the power is a constant integer. Before, our codegen for std::pow(.., int) always turned into a libcall, which was really inefficient. This should also make many gfortran programs happier I'd imagine. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92388 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r--test/CodeGen/X86/2007-09-27-LDIntrinsics.ll53
-rw-r--r--test/CodeGen/X86/powi.ll11
2 files changed, 29 insertions, 35 deletions
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index 4a56ee446a..4d6971586c 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -1,47 +1,30 @@
-; RUN: llc < %s | grep powixf2
-; RUN: llc < %s | grep fsqrt
-; ModuleID = 'yyy.c'
+; RUN: llc < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin8"
-define x86_fp80 @foo(x86_fp80 %x) {
+define x86_fp80 @foo(x86_fp80 %x) nounwind{
entry:
- %x_addr = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %retval = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %tmp = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- store x86_fp80 %x, x86_fp80* %x_addr
- %tmp1 = load x86_fp80* %x_addr, align 16 ; <x86_fp80> [#uses=1]
- %tmp2 = call x86_fp80 @llvm.sqrt.f80( x86_fp80 %tmp1 ) ; <x86_fp80> [#uses=1]
- store x86_fp80 %tmp2, x86_fp80* %tmp, align 16
- %tmp3 = load x86_fp80* %tmp, align 16 ; <x86_fp80> [#uses=1]
- store x86_fp80 %tmp3, x86_fp80* %retval, align 16
- br label %return
-
-return: ; preds = %entry
- %retval4 = load x86_fp80* %retval ; <x86_fp80> [#uses=1]
- ret x86_fp80 %retval4
+ %tmp2 = call x86_fp80 @llvm.sqrt.f80( x86_fp80 %x )
+ ret x86_fp80 %tmp2
+
+; CHECK: foo:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fsqrt
+; CHECK-NEXT: ret
}
declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
-define x86_fp80 @bar(x86_fp80 %x) {
+define x86_fp80 @bar(x86_fp80 %x) nounwind {
entry:
- %x_addr = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %retval = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %tmp = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- store x86_fp80 %x, x86_fp80* %x_addr
- %tmp1 = load x86_fp80* %x_addr, align 16 ; <x86_fp80> [#uses=1]
- %tmp2 = call x86_fp80 @llvm.powi.f80( x86_fp80 %tmp1, i32 3 ) ; <x86_fp80> [#uses=1]
- store x86_fp80 %tmp2, x86_fp80* %tmp, align 16
- %tmp3 = load x86_fp80* %tmp, align 16 ; <x86_fp80> [#uses=1]
- store x86_fp80 %tmp3, x86_fp80* %retval, align 16
- br label %return
-
-return: ; preds = %entry
- %retval4 = load x86_fp80* %retval ; <x86_fp80> [#uses=1]
- ret x86_fp80 %retval4
+ %tmp2 = call x86_fp80 @llvm.powi.f80( x86_fp80 %x, i32 3 )
+ ret x86_fp80 %tmp2
+; CHECK: bar:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fld %st(0)
+; CHECK-NEXT: fmul %st(1)
+; CHECK-NEXT: fmulp %st(1)
+; CHECK-NEXT: ret
}
declare x86_fp80 @llvm.powi.f80(x86_fp80, i32)
diff --git a/test/CodeGen/X86/powi.ll b/test/CodeGen/X86/powi.ll
new file mode 100644
index 0000000000..c3d68312ce
--- /dev/null
+++ b/test/CodeGen/X86/powi.ll
@@ -0,0 +1,11 @@
+; RUN: llc %s -march=x86 -mcpu=yonah -o - | grep mulsd | count 6
+; Ideally this would compile to 5 multiplies.
+
+define double @_Z3f10d(double %a) nounwind readonly ssp noredzone {
+entry:
+ %0 = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+ ret double %0
+}
+
+declare double @llvm.powi.f64(double, i32) nounwind readonly
+