From 44697f3fc1c81644aedadf5e879fed7ff56a03da Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 19 Feb 2014 10:02:43 +0000 Subject: X86 CodeGenPrep: sink shufflevectors before shifts On x86, shifting a vector by a scalar is significantly cheaper than shifting a vector by another fully general vector. Unfortunately, because SelectionDAG operates on just one basic block at a time, the shufflevector instruction that reveals whether the right-hand side of a shift *is* really a scalar is often not visible to CodeGen when it's needed. This adds another handler to CodeGenPrepare, to sink any useful shufflevector instructions down to the basic block where they're used, predicated on a target hook (since on other architectures, doing so will often just introduce extra real work). rdar://problem/16063505 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201655 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll | 102 +++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll (limited to 'test/Transforms') diff --git a/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll b/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll new file mode 100644 index 0000000000..c4ee79cd7f --- /dev/null +++ b/test/Transforms/CodeGenPrepare/x86-shuffle-sink.ll @@ -0,0 +1,102 @@ +; RUN: opt -S -codegenprepare -mtriple=x86_64-apple-macosx10.9 -mcpu=core-avx2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 +; RUN: opt -S -codegenprepare -mtriple=x86_64-apple-macosx10.9 -mcpu=corei7 %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SSE2 + +define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { +; CHECK-LABEL: @test_8bit +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK-NOT: shufflevector +; CHECK: shl <16 x i8> %lhs, %mask + %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <16 x i8> %mask + +if_false: + %res = shl <16 x i8> %lhs, %mask + ret <16 x i8> %res +} + +define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { +; CHECK-LABEL: @test_16bit +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK: shl <8 x i16> %lhs, [[SPLAT]] + %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <8 x i16> %mask + +if_false: + %res = shl <8 x i16> %lhs, %mask + ret <8 x i16> %res +} + +define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-LABEL: @test_notsplat +; CHECK: if_true: +; CHECK-NOT: shufflevector + +; CHECK: if_false: +; CHECK-NOT: shufflevector +; CHECK: shl <4 x i32> %lhs, %mask + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = shl <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-AVX2-LABEL: @test_32bit +; CHECK-AVX2: if_false: +; CHECK-AVX2-NOT: shufflevector +; CHECK-AVX2: ashr <4 x i32> %lhs, %mask + +; CHECK-SSE2-LABEL: @test_32bit +; CHECK-SSE2: if_false: +; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK-SSE2: ashr <4 x i32> %lhs, [[SPLAT]] + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = ashr <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { +; CHECK-AVX2-LABEL: @test_64bit +; CHECK-AVX2: if_false: +; CHECK-AVX2-NOT: shufflevector +; CHECK-AVX2: lshr <2 x i64> %lhs, %mask + +; CHECK-SSE2-LABEL: @test_64bit +; CHECK-SSE2: if_false: +; CHECK-SSE2: [[SPLAT:%[0-9a-zA-Z_]+]] = shufflevector +; CHECK-SSE2: lshr <2 x i64> %lhs, [[SPLAT]] + + %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <2 x i64> %mask + +if_false: + %res = lshr <2 x i64> %lhs, %mask + ret <2 x i64> %res +} -- cgit v1.2.3