summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-02-19 10:02:43 +0000
committerTim Northover <tnorthover@apple.com>2014-02-19 10:02:43 +0000
commit44697f3fc1c81644aedadf5e879fed7ff56a03da (patch)
tree63d1b192dfd091c63c6d8f9babd5b34873ab367d /test/Transforms
parent84d5a235fdd09b7ed5271203a0b4ac9ab657edab (diff)
downloadllvm-44697f3fc1c81644aedadf5e879fed7ff56a03da.tar.gz
llvm-44697f3fc1c81644aedadf5e879fed7ff56a03da.tar.bz2
llvm-44697f3fc1c81644aedadf5e879fed7ff56a03da.tar.xz
X86 CodeGenPrep: sink shufflevectors before shifts
On x86, shifting a vector by a scalar is significantly cheaper than shifting a vector by another fully general vector. Unfortunately, because SelectionDAG operates on just one basic block at a time, the shufflevector instruction that reveals whether the right-hand side of a shift *is* really a scalar is often not visible to CodeGen when it's needed. This adds another handler to CodeGenPrepare, to sink any useful shufflevector instructions down to the basic block where they're used, predicated on a target hook (since on other architectures, doing so will often just introduce extra real work). rdar://problem/16063505 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201655 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll102
1 files changed, 102 insertions, 0 deletions
diff --git a/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll b/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll
new file mode 100644
index 0000000000..c4ee79cd7f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -codegenprepare -mtriple=x86_64-apple-macosx10.9 -mcpu=core-avx2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
+; RUN: opt -S -codegenprepare -mtriple=x86_64-apple-macosx10.9 -mcpu=corei7 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE2
+
+define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_8bit
+; CHECK: if_true:
+; CHECK-NOT: shufflevector
+
+; CHECK: if_false:
+; CHECK-NOT: shufflevector
+; CHECK: shl <16 x i8> %lhs, %mask
+ %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <16 x i8> %mask
+
+if_false:
+ %res = shl <16 x i8> %lhs, %mask
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_16bit
+; CHECK: if_true:
+; CHECK-NOT: shufflevector
+
+; CHECK: if_false:
+; CHECK: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
+; CHECK: shl <8 x i16> %lhs, [[SPLAT]]
+ %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <8 x i16> %mask
+
+if_false:
+ %res = shl <8 x i16> %lhs, %mask
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_notsplat
+; CHECK: if_true:
+; CHECK-NOT: shufflevector
+
+; CHECK: if_false:
+; CHECK-NOT: shufflevector
+; CHECK: shl <4 x i32> %lhs, %mask
+ %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <4 x i32> %mask
+
+if_false:
+ %res = shl <4 x i32> %lhs, %mask
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-AVX2-LABEL: @test_32bit
+; CHECK-AVX2: if_false:
+; CHECK-AVX2-NOT: shufflevector
+; CHECK-AVX2: ashr <4 x i32> %lhs, %mask
+
+; CHECK-SSE2-LABEL: @test_32bit
+; CHECK-SSE2: if_false:
+; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
+; CHECK-SSE2: ashr <4 x i32> %lhs, [[SPLAT]]
+ %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <4 x i32> %mask
+
+if_false:
+ %res = ashr <4 x i32> %lhs, %mask
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) {
+; CHECK-AVX2-LABEL: @test_64bit
+; CHECK-AVX2: if_false:
+; CHECK-AVX2-NOT: shufflevector
+; CHECK-AVX2: lshr <2 x i64> %lhs, %mask
+
+; CHECK-SSE2-LABEL: @test_64bit
+; CHECK-SSE2: if_false:
+; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector
+; CHECK-SSE2: lshr <2 x i64> %lhs, [[SPLAT]]
+
+ %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <2 x i64> %mask
+
+if_false:
+ %res = lshr <2 x i64> %lhs, %mask
+ ret <2 x i64> %res
+}