X86: fold SSE2/AVX2 logical shift by immediate amount into zero vector when possible

Patch by Andrea Di Biagio git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186165 91177308-0d34-0410-b5e6-96231b3b80d8
author: Stephen Lin <stephenwlin@gmail.com> 2013-07-12 15:31:36 +0000
committer: Stephen Lin <stephenwlin@gmail.com> 2013-07-12 15:31:36 +0000
commit: fff967358b56c4e191089f668b75ae415b5bd992 (patch)
tree: 063cb5d8eab7374555fa488f0229a8f123353c9c /test/CodeGen
parent: 55ec2218c448ef9e0d09b5534885b6d2a9786a73 (diff)
download: llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.gz
llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.bz2
llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.xz
2 files changed, 494 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
new file mode 100644
index 0000000000..ca18a60b3c
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -0,0 +1,247 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; AVX2 Logical Shift Left
+
+define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
+entry:
+  %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_sllw_1:
+; CHECK: vpsllw  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
+entry:
+  %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_sllw_2:
+; CHECK: vpaddw  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
+entry:
+  %shl = shl <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_sllw_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
+entry:
+  %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_slld_1:
+; CHECK: vpslld  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
+entry:
+  %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_slld_2:
+; CHECK: vpaddd  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
+entry:
+  %shl = shl <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_slld_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %shl
+}
+
+; CHECK: test_sllq_1:
+; CHECK: vpsllq  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %shl
+}
+
+; CHECK: test_sllq_2:
+; CHECK: vpaddq  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 64, i64 64, i64 64, i64 64>
+  ret <4 x i64> %shl
+}
+
+; CHECK: test_sllq_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+; AVX2 Arithmetic Shift
+
+define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
+entry:
+  %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_sraw_1:
+; CHECK: vpsraw  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
+entry:
+  %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_sraw_2:
+; CHECK: vpsraw  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
+entry:
+  %shl = ashr <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_sraw_3:
+; CHECK: vpsraw  $16, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
+entry:
+  %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_srad_1:
+; CHECK: vpsrad  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
+entry:
+  %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_srad_2:
+; CHECK: vpsrad  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
+entry:
+  %shl = ashr <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_srad_3:
+; CHECK: vpsrad  $32, %ymm0, %ymm0
+; CHECK: ret
+
+; SSE Logical Shift Right
+
+define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
+entry:
+  %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_srlw_1:
+; CHECK: vpsrlw  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
+entry:
+  %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_srlw_2:
+; CHECK: vpsrlw  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
+entry:
+  %shl = lshr <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <16 x i16> %shl
+}
+
+; CHECK: test_srlw_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
+entry:
+  %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_srld_1:
+; CHECK: vpsrld  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
+entry:
+  %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_srld_2:
+; CHECK: vpsrld  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
+entry:
+  %shl = lshr <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  ret <8 x i32> %shl
+}
+
+; CHECK: test_srld_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
+entry:
+  %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %shl
+}
+
+; CHECK: test_srlq_1:
+; CHECK: vpsrlq  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
+entry:
+  %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %shl
+}
+
+; CHECK: test_srlq_2:
+; CHECK: vpsrlq  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
+entry:
+  %shl = lshr <4 x i64> %InVec, <i64 64, i64 64, i64 64, i64 64>
+  ret <4 x i64> %shl
+}
+
+; CHECK: test_srlq_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
diff --git a/test/CodeGen/X86/sse2-vector-shifts.ll b/test/CodeGen/X86/sse2-vector-shifts.ll
new file mode 100644
index 0000000000..312ca9533c
--- /dev/null
+++ b/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -0,0 +1,247 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 -mcpu=corei7 | FileCheck %s
+
+; SSE2 Logical Shift Left
+
+define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
+entry:
+  %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_sllw_1:
+; CHECK: psllw   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
+entry:
+  %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_sllw_2:
+; CHECK: paddw   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
+entry:
+  %shl = shl <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_sllw_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_slld_1:
+; CHECK: pslld   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_slld_2:
+; CHECK: paddd   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_slld_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
+entry:
+  %shl = shl <2 x i64> %InVec, <i64 0, i64 0>
+  ret <2 x i64> %shl
+}
+
+; CHECK: test_sllq_1:
+; CHECK: psllq   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
+entry:
+  %shl = shl <2 x i64> %InVec, <i64 1, i64 1>
+  ret <2 x i64> %shl
+}
+
+; CHECK: test_sllq_2:
+; CHECK: paddq   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
+entry:
+  %shl = shl <2 x i64> %InVec, <i64 64, i64 64>
+  ret <2 x i64> %shl
+}
+
+; CHECK: test_sllq_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+; SSE2 Arithmetic Shift
+
+define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
+entry:
+  %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_sraw_1:
+; CHECK: psraw   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
+entry:
+  %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_sraw_2:
+; CHECK: psraw   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
+entry:
+  %shl = ashr <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_sraw_3:
+; CHECK: psraw   $16, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
+entry:
+  %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_srad_1:
+; CHECK: psrad   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
+entry:
+  %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_srad_2:
+; CHECK: psrad   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
+entry:
+  %shl = ashr <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_srad_3:
+; CHECK: psrad   $32, %xmm0
+; CHECK-NEXT: ret
+
+; SSE Logical Shift Right
+
+define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
+entry:
+  %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_srlw_1:
+; CHECK: psrlw   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
+entry:
+  %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_srlw_2:
+; CHECK: psrlw   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
+entry:
+  %shl = lshr <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <8 x i16> %shl
+}
+
+; CHECK: test_srlw_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
+entry:
+  %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_srld_1:
+; CHECK: psrld   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
+entry:
+  %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_srld_2:
+; CHECK: psrld   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
+entry:
+  %shl = lshr <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32>
+  ret <4 x i32> %shl
+}
+
+; CHECK: test_srld_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
+entry:
+  %shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
+  ret <2 x i64> %shl
+}
+
+; CHECK: test_srlq_1:
+; CHECK: psrlq   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
+entry:
+  %shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
+  ret <2 x i64> %shl
+}
+
+; CHECK: test_srlq_2:
+; CHECK: psrlq   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
+entry:
+  %shl = lshr <2 x i64> %InVec, <i64 64, i64 64>
+  ret <2 x i64> %shl
+}
+
+; CHECK: test_srlq_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
author	Stephen Lin <stephenwlin@gmail.com>	2013-07-12 15:31:36 +0000
committer	Stephen Lin <stephenwlin@gmail.com>	2013-07-12 15:31:36 +0000
commit	fff967358b56c4e191089f668b75ae415b5bd992 (patch)
tree	063cb5d8eab7374555fa488f0229a8f123353c9c /test/CodeGen
parent	55ec2218c448ef9e0d09b5534885b6d2a9786a73 (diff)
download	llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.gz llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.bz2 llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.xz