diff options
author | Hal Finkel <hfinkel@anl.gov> | 2012-11-01 21:50:12 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2012-11-01 21:50:12 +0000 |
commit | 78fd353d5e5daedc47ecc31b6193ca48793c249c (patch) | |
tree | 281e57ed39b5d3e3e57985640866d89fa075fa39 /test | |
parent | 5fc8c7cb8571f99b69264aeba48c45eed1c69f6a (diff) | |
download | llvm-78fd353d5e5daedc47ecc31b6193ca48793c249c.tar.gz llvm-78fd353d5e5daedc47ecc31b6193ca48793c249c.tar.bz2 llvm-78fd353d5e5daedc47ecc31b6193ca48793c249c.tar.xz |
BBVectorize: Use target costs for incoming and outgoing values instead of the depth heuristic.
When target cost information is available, compute explicit costs of inserting and
extracting values from vectors. At this point, all costs are estimated using the
target information, and the chain-depth heuristic is not needed. As a result, it is now, by
default, disabled when using target costs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167256 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/Transforms/BBVectorize/X86/simple-ldstr.ll | 29 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/X86/simple.ll | 81 |
2 files changed, 99 insertions, 11 deletions
diff --git a/test/Transforms/BBVectorize/X86/simple-ldstr.ll b/test/Transforms/BBVectorize/X86/simple-ldstr.ll new file mode 100644 index 0000000000..0124399bad --- /dev/null +++ b/test/Transforms/BBVectorize/X86/simple-ldstr.ll @@ -0,0 +1,29 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s + +; Simple 3-pair chain with loads and stores +define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly { +entry: + %i0 = load double* %a, align 8 + %i1 = load double* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double* %a, i64 1 + %i3 = load double* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double* %b, i64 1 + %i4 = load double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + store double %mul, double* %c, align 8 + %arrayidx5 = getelementptr inbounds double* %c, i64 1 + store double %mul5, double* %arrayidx5, align 8 + ret void +; CHECK: @test1 +; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>* +; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>* +; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8 +; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8 +; CHECK: %mul = fmul <2 x double> %i0, %i1 +; CHECK: %0 = bitcast double* %c to <2 x double>* +; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8 +; CHECK: ret void +} + diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll index d11c9b92f0..0113e38bb1 100644 --- a/test/Transforms/BBVectorize/X86/simple.ll +++ b/test/Transforms/BBVectorize/X86/simple.ll @@ -3,25 +3,84 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Basic depth-3 chain define double @test1(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R ; CHECK: @test1 +; CHECK-NOT: fmul <2 x double> +; CHECK: ret double %R +} + +; Basic chain +define double @test1a(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %W1 = fadd double %Y1, %Z1 + %W2 = fadd double %Y2, %Z2 + %V1 = fadd double %W1, %Z1 + %V2 = fadd double %W2, %Z2 + %Q1 = fadd double %W1, %V1 + %Q2 = fadd double %W2, %V2 + %S1 = fadd double %W1, %Q1 + %S2 = fadd double %W2, %Q2 + %R = fmul double %S1, %S2 + ret double %R +; CHECK: @test1a ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 +; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 +; CHECK: %W1 = fadd <2 x double> %Y1, %Z1 +; CHECK: %V1 = fadd <2 x double> %W1, %Z1 +; CHECK: %Q1 = fadd <2 x double> %W1, %V1 +; CHECK: %S1 = fadd <2 x double> %W1, %Q1 +; CHECK: %S1.v.r1 = extractelement <2 x double> %S1, i32 0 +; CHECK: %S1.v.r2 = extractelement <2 x double> %S1, i32 1 +; CHECK: %R = fmul double %S1.v.r1, %S1.v.r2 +; CHECK: ret double %R +} + +; Basic depth-3 chain (last pair permuted) +define double @test2(double %A1, double %A2, double %B1, double %B2) { %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 -; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 %Y1 = fmul double %X1, %A1 %Y2 = fmul double %X2, %A2 -; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 - %Z1 = fadd double %Y1, %B1 - %Z2 = fadd double %Y2, %B2 -; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y1, %B2 + %R = fmul double %Z1, %Z2 + ret double %R +; CHECK: @test2 +; CHECK-NOT: fmul <2 x double> +; CHECK: ret double %R +} + +; Basic depth-4 chain (internal permutation) +define double @test4(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y1, %B2 + %W1 = fadd double %Y2, %Z1 + %W2 = fadd double %Y1, %Z2 %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 ret double %R +; CHECK: @test4 +; CHECK-NOT: fmul <2 x double> ; CHECK: ret double %R } @@ -37,8 +96,8 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1> %R = mul <8 x i8> %Q1, %Q2 ret <8 x i8> %R -; CHECK-TI: @test6 -; CHECK-TI-NOT: sub <16 x i8> -; CHECK-TI: ret <8 x i8> +; CHECK: @test6 +; CHECK-NOT: sub <16 x i8> +; CHECK: ret <8 x i8> } |