diff options
author | Tim Northover <tnorthover@apple.com> | 2014-02-19 10:02:43 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-02-19 10:02:43 +0000 |
commit | 44697f3fc1c81644aedadf5e879fed7ff56a03da (patch) | |
tree | 63d1b192dfd091c63c6d8f9babd5b34873ab367d /test/Transforms | |
parent | 84d5a235fdd09b7ed5271203a0b4ac9ab657edab (diff) | |
download | llvm-44697f3fc1c81644aedadf5e879fed7ff56a03da.tar.gz llvm-44697f3fc1c81644aedadf5e879fed7ff56a03da.tar.bz2 llvm-44697f3fc1c81644aedadf5e879fed7ff56a03da.tar.xz |
X86 CodeGenPrep: sink shufflevectors before shifts
On x86, shifting a vector by a scalar is significantly cheaper than shifting a
vector by another fully general vector. Unfortunately, because SelectionDAG
operates on just one basic block at a time, the shufflevector instruction that
reveals whether the right-hand side of a shift *is* really a scalar is often
not visible to CodeGen when it's needed.
This adds another handler to CodeGenPrepare, to sink any useful shufflevector
instructions down to the basic block where they're used, predicated on a target
hook (since on other architectures, doing so will often just introduce extra
real work).
rdar://problem/16063505
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201655 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms')
-rw-r--r-- | test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll b/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll new file mode 100644 index 0000000000..c4ee79cd7f --- /dev/null +++ b/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll @@ -0,0 +1,102 @@ +; RUN: opt -S -codegenprepare -mtriple=x86_64-apple-macosx10.9 -mcpu=core-avx2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 +; RUN: opt -S -codegenprepare -mtriple=x86_64-apple-macosx10.9 -mcpu=corei7 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE2 + +define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { +; CHECK-LABEL: @test_8bit +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK-NOT: shufflevector +; CHECK: shl <16 x i8> %lhs, %mask + %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <16 x i8> %mask + +if_false: + %res = shl <16 x i8> %lhs, %mask + ret <16 x i8> %res +} + +define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { +; CHECK-LABEL: @test_16bit +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK: shl <8 x i16> %lhs, [[SPLAT]] + %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <8 x i16> %mask + +if_false: + %res = shl <8 x i16> %lhs, %mask + ret <8 x i16> %res +} + +define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-LABEL: @test_notsplat +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK-NOT: shufflevector +; CHECK: shl <4 x i32> %lhs, %mask + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = shl <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-AVX2-LABEL: @test_32bit +; CHECK-AVX2: if_false: +; CHECK-AVX2-NOT: shufflevector +; CHECK-AVX2: ashr <4 x i32> %lhs, %mask + +; CHECK-SSE2-LABEL: @test_32bit +; CHECK-SSE2: if_false: +; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK-SSE2: ashr <4 x i32> %lhs, [[SPLAT]] + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = ashr <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { +; CHECK-AVX2-LABEL: @test_64bit +; CHECK-AVX2: if_false: +; CHECK-AVX2-NOT: shufflevector +; CHECK-AVX2: lshr <2 x i64> %lhs, %mask + +; CHECK-SSE2-LABEL: @test_64bit +; CHECK-SSE2: if_false: +; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK-SSE2: lshr <2 x i64> %lhs, [[SPLAT]] + + %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <2 x i64> %mask + +if_false: + %res = lshr <2 x i64> %lhs, %mask + ret <2 x i64> %res +} |