summaryrefslogtreecommitdiff
path: root/test/Analysis
diff options
context:
space:
mode:
authorRaul E. Silvera <rsilvera@google.com>2014-03-10 22:59:13 +0000
committerRaul E. Silvera <rsilvera@google.com>2014-03-10 22:59:13 +0000
commit6df2b690989f499965640305b2045142192eca74 (patch)
treec20a09914280991ec6209559cd9f04f946b30d27 /test/Analysis
parent87393cfd6b4e85e1b9baf804784e69dde30e019a (diff)
downloadllvm-6df2b690989f499965640305b2045142192eca74.tar.gz
llvm-6df2b690989f499965640305b2045142192eca74.tar.bz2
llvm-6df2b690989f499965640305b2045142192eca74.tar.xz
When analyzing vectors of element type that require legalization,
the legalization cost must be included to get an accurate estimation of the total cost of the scalarized vector. The inaccurate cost triggered unprofitable SLP vectorization on 32-bit X86. Summary: Include legalization overhead when computing scalarization cost Reviewers: hfinkel, nadav CC: chandlerc, rnk, llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D2992 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203509 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Analysis')
-rw-r--r--test/Analysis/CostModel/ARM/cast.ll32
-rw-r--r--test/Analysis/CostModel/X86/scalarize.ll41
2 files changed, 57 insertions, 16 deletions
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index a6ed798b95..662110f272 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -221,9 +221,9 @@ define i32 @casts() {
%r96 = fptoui <2 x float> undef to <2 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r97 = fptosi <2 x float> undef to <2 x i32>
- ; CHECK: cost of 24 {{.*}} fptoui
+ ; CHECK: cost of 28 {{.*}} fptoui
%r98 = fptoui <2 x float> undef to <2 x i64>
- ; CHECK: cost of 24 {{.*}} fptosi
+ ; CHECK: cost of 28 {{.*}} fptosi
%r99 = fptosi <2 x float> undef to <2 x i64>
; CHECK: cost of 8 {{.*}} fptoui
@@ -242,9 +242,9 @@ define i32 @casts() {
%r106 = fptoui <2 x double> undef to <2 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r107 = fptosi <2 x double> undef to <2 x i32>
- ; CHECK: cost of 24 {{.*}} fptoui
+ ; CHECK: cost of 28 {{.*}} fptoui
%r108 = fptoui <2 x double> undef to <2 x i64>
- ; CHECK: cost of 24 {{.*}} fptosi
+ ; CHECK: cost of 28 {{.*}} fptosi
%r109 = fptosi <2 x double> undef to <2 x i64>
; CHECK: cost of 16 {{.*}} fptoui
@@ -263,9 +263,9 @@ define i32 @casts() {
%r116 = fptoui <4 x float> undef to <4 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r117 = fptosi <4 x float> undef to <4 x i32>
- ; CHECK: cost of 48 {{.*}} fptoui
+ ; CHECK: cost of 56 {{.*}} fptoui
%r118 = fptoui <4 x float> undef to <4 x i64>
- ; CHECK: cost of 48 {{.*}} fptosi
+ ; CHECK: cost of 56 {{.*}} fptosi
%r119 = fptosi <4 x float> undef to <4 x i64>
; CHECK: cost of 16 {{.*}} fptoui
@@ -284,9 +284,9 @@ define i32 @casts() {
%r126 = fptoui <4 x double> undef to <4 x i32>
; CHECK: cost of 16 {{.*}} fptosi
%r127 = fptosi <4 x double> undef to <4 x i32>
- ; CHECK: cost of 48 {{.*}} fptoui
+ ; CHECK: cost of 56 {{.*}} fptoui
%r128 = fptoui <4 x double> undef to <4 x i64>
- ; CHECK: cost of 48 {{.*}} fptosi
+ ; CHECK: cost of 56 {{.*}} fptosi
%r129 = fptosi <4 x double> undef to <4 x i64>
; CHECK: cost of 32 {{.*}} fptoui
@@ -305,9 +305,9 @@ define i32 @casts() {
%r136 = fptoui <8 x float> undef to <8 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r137 = fptosi <8 x float> undef to <8 x i32>
- ; CHECK: cost of 96 {{.*}} fptoui
+ ; CHECK: cost of 112 {{.*}} fptoui
%r138 = fptoui <8 x float> undef to <8 x i64>
- ; CHECK: cost of 96 {{.*}} fptosi
+ ; CHECK: cost of 112 {{.*}} fptosi
%r139 = fptosi <8 x float> undef to <8 x i64>
; CHECK: cost of 32 {{.*}} fptoui
@@ -326,9 +326,9 @@ define i32 @casts() {
%r146 = fptoui <8 x double> undef to <8 x i32>
; CHECK: cost of 32 {{.*}} fptosi
%r147 = fptosi <8 x double> undef to <8 x i32>
- ; CHECK: cost of 96 {{.*}} fptoui
+ ; CHECK: cost of 112 {{.*}} fptoui
%r148 = fptoui <8 x double> undef to <8 x i64>
- ; CHECK: cost of 96 {{.*}} fptosi
+ ; CHECK: cost of 112 {{.*}} fptosi
%r149 = fptosi <8 x double> undef to <8 x i64>
; CHECK: cost of 64 {{.*}} fptoui
@@ -347,9 +347,9 @@ define i32 @casts() {
%r156 = fptoui <16 x float> undef to <16 x i32>
; CHECK: cost of 4 {{.*}} fptosi
%r157 = fptosi <16 x float> undef to <16 x i32>
- ; CHECK: cost of 192 {{.*}} fptoui
+ ; CHECK: cost of 224 {{.*}} fptoui
%r158 = fptoui <16 x float> undef to <16 x i64>
- ; CHECK: cost of 192 {{.*}} fptosi
+ ; CHECK: cost of 224 {{.*}} fptosi
%r159 = fptosi <16 x float> undef to <16 x i64>
; CHECK: cost of 64 {{.*}} fptoui
@@ -368,9 +368,9 @@ define i32 @casts() {
%r166 = fptoui <16 x double> undef to <16 x i32>
; CHECK: cost of 64 {{.*}} fptosi
%r167 = fptosi <16 x double> undef to <16 x i32>
- ; CHECK: cost of 192 {{.*}} fptoui
+ ; CHECK: cost of 224 {{.*}} fptoui
%r168 = fptoui <16 x double> undef to <16 x i64>
- ; CHECK: cost of 192 {{.*}} fptosi
+ ; CHECK: cost of 224 {{.*}} fptosi
%r169 = fptosi <16 x double> undef to <16 x i64>
; CHECK: cost of 8 {{.*}} uitofp
diff --git a/test/Analysis/CostModel/X86/scalarize.ll b/test/Analysis/CostModel/X86/scalarize.ll
new file mode 100644
index 0000000000..fc25fcbc56
--- /dev/null
+++ b/test/Analysis/CostModel/X86/scalarize.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK32
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK64
+
+; Test vector scalarization costs.
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+%i4 = type <4 x i32>
+%i8 = type <2 x i64>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+declare %i4 @llvm.bswap.v4i32(%i4)
+declare %i8 @llvm.bswap.v2i64(%i8)
+
+declare %i4 @llvm.ctpop.v4i32(%i4)
+declare %i8 @llvm.ctpop.v2i64(%i8)
+
+; CHECK32-LABEL: test_scalarized_intrinsics
+; CHECK64-LABEL: test_scalarized_intrinsics
+define void @test_scalarized_intrinsics() {
+ %r1 = add %i8 undef, undef
+
+; CHECK32: cost of 12 {{.*}}bswap.v4i32
+; CHECK64: cost of 12 {{.*}}bswap.v4i32
+ %r2 = call %i4 @llvm.bswap.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}bswap.v2i64
+; CHECK64: cost of 6 {{.*}}bswap.v2i64
+ %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
+
+; CHECK32: cost of 12 {{.*}}ctpop.v4i32
+; CHECK64: cost of 12 {{.*}}ctpop.v4i32
+ %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
+; CHECK32: cost of 10 {{.*}}ctpop.v2i64
+; CHECK64: cost of 6 {{.*}}ctpop.v2i64
+ %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
+
+; CHECK32: ret
+; CHECK64: ret
+ ret void
+}