Cost model support for lowered math builtins.

We make the cost for calling libm functions extremely high as emitting the calls is expensive and causes spills (on x86) so performance suffers. We still vectorize important calls like ceilf and friends on SSE4.1. and fabs. Differential Revision: http://llvm-reviews.chandlerc.com/D466 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176287 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2013-02-28 19:09:33 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2013-02-28 19:09:33 +0000
commit: 8611d4449a77ca05e808823bc966573a85da00cb (patch)
tree: 948b8110ba823e8880a6445c17aaae4dd370d151 /test/Analysis
parent: 279706e90e12e9418d4e8f9415d5f3ed33a99bdb (diff)
download: llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.gz
llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.bz2
llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.xz
1 files changed, 32 insertions, 0 deletions
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
new file mode 100644
index 0000000000..e235a36222
--- /dev/null
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck %s -check-prefix=CORE2
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=corei7 -cost-model -analyze < %s | FileCheck %s -check-prefix=COREI7
+
+; If SSE4.1 roundps instruction is available it is cheap to lower, otherwise
+; it'll be scalarized into calls which are expensive.
+define void @test1(float* nocapture %f) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float* %f, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+  store <4 x float> %2, <4 x float>* %1, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1':
+; CORE2: Cost Model: Found an estimated cost of 400 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+
+; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1':
+; COREI7: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)  nounwind readnone
author	Benjamin Kramer <benny.kra@googlemail.com>	2013-02-28 19:09:33 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2013-02-28 19:09:33 +0000
commit	8611d4449a77ca05e808823bc966573a85da00cb (patch)
tree	948b8110ba823e8880a6445c17aaae4dd370d151 /test/Analysis
parent	279706e90e12e9418d4e8f9415d5f3ed33a99bdb (diff)
download	llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.gz llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.bz2 llvm-8611d4449a77ca05e808823bc966573a85da00cb.tar.xz