diff options
author | Stephen Lin <stephenwlin@gmail.com> | 2013-07-12 15:31:36 +0000 |
---|---|---|
committer | Stephen Lin <stephenwlin@gmail.com> | 2013-07-12 15:31:36 +0000 |
commit | fff967358b56c4e191089f668b75ae415b5bd992 (patch) | |
tree | 063cb5d8eab7374555fa488f0229a8f123353c9c /test/CodeGen | |
parent | 55ec2218c448ef9e0d09b5534885b6d2a9786a73 (diff) | |
download | llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.gz llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.bz2 llvm-fff967358b56c4e191089f668b75ae415b5bd992.tar.xz |
X86: fold SSE2/AVX2 logical shift by immediate amount into zero vector when possible
Patch by Andrea Di Biagio
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186165 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/X86/avx2-vector-shifts.ll | 247 | ||||
-rw-r--r-- | test/CodeGen/X86/sse2-vector-shifts.ll | 247 |
2 files changed, 494 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll new file mode 100644 index 0000000000..ca18a60b3c --- /dev/null +++ b/test/CodeGen/X86/avx2-vector-shifts.ll @@ -0,0 +1,247 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s + +; AVX2 Logical Shift Left + +define <16 x i16> @test_sllw_1(<16 x i16> %InVec) { +entry: + %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> + ret <16 x i16> %shl +} + +; CHECK: test_sllw_1: +; CHECK: vpsllw $0, %ymm0, %ymm0 +; CHECK: ret + +define <16 x i16> @test_sllw_2(<16 x i16> %InVec) { +entry: + %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <16 x i16> %shl +} + +; CHECK: test_sllw_2: +; CHECK: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <16 x i16> @test_sllw_3(<16 x i16> %InVec) { +entry: + %shl = shl <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> + ret <16 x i16> %shl +} + +; CHECK: test_sllw_3: +; CHECK: vxorps %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_slld_1(<8 x i32> %InVec) { +entry: + %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + ret <8 x i32> %shl +} + +; CHECK: test_slld_1: +; CHECK: vpslld $0, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_slld_2(<8 x i32> %InVec) { +entry: + %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <8 x i32> %shl +} + +; CHECK: test_slld_2: +; CHECK: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_slld_3(<8 x i32> %InVec) { +entry: + %shl = shl <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> + ret <8 x i32> %shl +} + +; CHECK: test_slld_3: +; CHECK: vxorps %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <4 x i64> @test_sllq_1(<4 x i64> %InVec) { +entry: + %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> + ret <4 x i64> %shl +} + +; CHECK: test_sllq_1: +; CHECK: vpsllq $0, %ymm0, %ymm0 +; CHECK: ret + +define <4 x i64> @test_sllq_2(<4 x i64> %InVec) { +entry: + %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> + ret <4 x i64> %shl +} + +; CHECK: test_sllq_2: +; CHECK: vpaddq %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <4 x i64> @test_sllq_3(<4 x i64> %InVec) { +entry: + %shl = shl <4 x i64> %InVec, <i64 64, i64 64, i64 64, i64 64> + ret <4 x i64> %shl +} + +; CHECK: test_sllq_3: +; CHECK: vxorps %ymm0, %ymm0, %ymm0 +; CHECK: ret + +; AVX2 Arithmetic Shift + +define <16 x i16> @test_sraw_1(<16 x i16> %InVec) { +entry: + %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> + ret <16 x i16> %shl +} + +; CHECK: test_sraw_1: +; CHECK: vpsraw $0, %ymm0, %ymm0 +; CHECK: ret + +define <16 x i16> @test_sraw_2(<16 x i16> %InVec) { +entry: + %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <16 x i16> %shl +} + +; CHECK: test_sraw_2: +; CHECK: vpsraw $1, %ymm0, %ymm0 +; CHECK: ret + +define <16 x i16> @test_sraw_3(<16 x i16> %InVec) { +entry: + %shl = ashr <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> + ret <16 x i16> %shl +} + +; CHECK: test_sraw_3: +; CHECK: vpsraw $16, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_srad_1(<8 x i32> %InVec) { +entry: + %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + ret <8 x i32> %shl +} + +; CHECK: test_srad_1: +; CHECK: vpsrad $0, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_srad_2(<8 x i32> %InVec) { +entry: + %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <8 x i32> %shl +} + +; CHECK: test_srad_2: +; CHECK: vpsrad $1, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_srad_3(<8 x i32> %InVec) { +entry: + %shl = ashr <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> + ret <8 x i32> %shl +} + +; CHECK: test_srad_3: +; CHECK: vpsrad $32, %ymm0, %ymm0 +; CHECK: ret + +; SSE Logical Shift Right + +define <16 x i16> @test_srlw_1(<16 x i16> %InVec) { +entry: + %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> + ret <16 x i16> %shl +} + +; CHECK: test_srlw_1: +; CHECK: vpsrlw $0, %ymm0, %ymm0 +; CHECK: ret + +define <16 x i16> @test_srlw_2(<16 x i16> %InVec) { +entry: + %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <16 x i16> %shl +} + +; CHECK: test_srlw_2: +; CHECK: vpsrlw $1, %ymm0, %ymm0 +; CHECK: ret + +define <16 x i16> @test_srlw_3(<16 x i16> %InVec) { +entry: + %shl = lshr <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> + ret <16 x i16> %shl +} + +; CHECK: test_srlw_3: +; CHECK: vxorps %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_srld_1(<8 x i32> %InVec) { +entry: + %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + ret <8 x i32> %shl +} + +; CHECK: test_srld_1: +; CHECK: vpsrld $0, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_srld_2(<8 x i32> %InVec) { +entry: + %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <8 x i32> %shl +} + +; CHECK: test_srld_2: +; CHECK: vpsrld $1, %ymm0, %ymm0 +; CHECK: ret + +define <8 x i32> @test_srld_3(<8 x i32> %InVec) { +entry: + %shl = lshr <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> + ret <8 x i32> %shl +} + +; CHECK: test_srld_3: +; CHECK: vxorps %ymm0, %ymm0, %ymm0 +; CHECK: ret + +define <4 x i64> @test_srlq_1(<4 x i64> %InVec) { +entry: + %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> + ret <4 x i64> %shl +} + +; CHECK: test_srlq_1: +; CHECK: vpsrlq $0, %ymm0, %ymm0 +; CHECK: ret + +define <4 x i64> @test_srlq_2(<4 x i64> %InVec) { +entry: + %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> + ret <4 x i64> %shl +} + +; CHECK: test_srlq_2: +; CHECK: vpsrlq $1, %ymm0, %ymm0 +; CHECK: ret + +define <4 x i64> @test_srlq_3(<4 x i64> %InVec) { +entry: + %shl = lshr <4 x i64> %InVec, <i64 64, i64 64, i64 64, i64 64> + ret <4 x i64> %shl +} + +; CHECK: test_srlq_3: +; CHECK: vxorps %ymm0, %ymm0, %ymm0 +; CHECK: ret diff --git a/test/CodeGen/X86/sse2-vector-shifts.ll b/test/CodeGen/X86/sse2-vector-shifts.ll new file mode 100644 index 0000000000..312ca9533c --- /dev/null +++ b/test/CodeGen/X86/sse2-vector-shifts.ll @@ -0,0 +1,247 @@ +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 -mcpu=corei7 | FileCheck %s + +; SSE2 Logical Shift Left + +define <8 x i16> @test_sllw_1(<8 x i16> %InVec) { +entry: + %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> + ret <8 x i16> %shl +} + +; CHECK: test_sllw_1: +; CHECK: psllw $0, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test_sllw_2(<8 x i16> %InVec) { +entry: + %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %shl +} + +; CHECK: test_sllw_2: +; CHECK: paddw %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test_sllw_3(<8 x i16> %InVec) { +entry: + %shl = shl <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> + ret <8 x i16> %shl +} + +; CHECK: test_sllw_3: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_slld_1(<4 x i32> %InVec) { +entry: + %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> + ret <4 x i32> %shl +} + +; CHECK: test_slld_1: +; CHECK: pslld $0, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_slld_2(<4 x i32> %InVec) { +entry: + %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %shl +} + +; CHECK: test_slld_2: +; CHECK: paddd %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_slld_3(<4 x i32> %InVec) { +entry: + %shl = shl <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32> + ret <4 x i32> %shl +} + +; CHECK: test_slld_3: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <2 x i64> @test_sllq_1(<2 x i64> %InVec) { +entry: + %shl = shl <2 x i64> %InVec, <i64 0, i64 0> + ret <2 x i64> %shl +} + +; CHECK: test_sllq_1: +; CHECK: psllq $0, %xmm0 +; CHECK-NEXT: ret + +define <2 x i64> @test_sllq_2(<2 x i64> %InVec) { +entry: + %shl = shl <2 x i64> %InVec, <i64 1, i64 1> + ret <2 x i64> %shl +} + +; CHECK: test_sllq_2: +; CHECK: paddq %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <2 x i64> @test_sllq_3(<2 x i64> %InVec) { +entry: + %shl = shl <2 x i64> %InVec, <i64 64, i64 64> + ret <2 x i64> %shl +} + +; CHECK: test_sllq_3: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: ret + +; SSE2 Arithmetic Shift + +define <8 x i16> @test_sraw_1(<8 x i16> %InVec) { +entry: + %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> + ret <8 x i16> %shl +} + +; CHECK: test_sraw_1: +; CHECK: psraw $0, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test_sraw_2(<8 x i16> %InVec) { +entry: + %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %shl +} + +; CHECK: test_sraw_2: +; CHECK: psraw $1, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test_sraw_3(<8 x i16> %InVec) { +entry: + %shl = ashr <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> + ret <8 x i16> %shl +} + +; CHECK: test_sraw_3: +; CHECK: psraw $16, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_srad_1(<4 x i32> %InVec) { +entry: + %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> + ret <4 x i32> %shl +} + +; CHECK: test_srad_1: +; CHECK: psrad $0, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_srad_2(<4 x i32> %InVec) { +entry: + %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %shl +} + +; CHECK: test_srad_2: +; CHECK: psrad $1, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_srad_3(<4 x i32> %InVec) { +entry: + %shl = ashr <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32> + ret <4 x i32> %shl +} + +; CHECK: test_srad_3: +; CHECK: psrad $32, %xmm0 +; CHECK-NEXT: ret + +; SSE Logical Shift Right + +define <8 x i16> @test_srlw_1(<8 x i16> %InVec) { +entry: + %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> + ret <8 x i16> %shl +} + +; CHECK: test_srlw_1: +; CHECK: psrlw $0, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test_srlw_2(<8 x i16> %InVec) { +entry: + %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %shl +} + +; CHECK: test_srlw_2: +; CHECK: psrlw $1, %xmm0 +; CHECK-NEXT: ret + +define <8 x i16> @test_srlw_3(<8 x i16> %InVec) { +entry: + %shl = lshr <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> + ret <8 x i16> %shl +} + +; CHECK: test_srlw_3: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_srld_1(<4 x i32> %InVec) { +entry: + %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> + ret <4 x i32> %shl +} + +; CHECK: test_srld_1: +; CHECK: psrld $0, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_srld_2(<4 x i32> %InVec) { +entry: + %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %shl +} + +; CHECK: test_srld_2: +; CHECK: psrld $1, %xmm0 +; CHECK-NEXT: ret + +define <4 x i32> @test_srld_3(<4 x i32> %InVec) { +entry: + %shl = lshr <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32> + ret <4 x i32> %shl +} + +; CHECK: test_srld_3: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: ret + +define <2 x i64> @test_srlq_1(<2 x i64> %InVec) { +entry: + %shl = lshr <2 x i64> %InVec, <i64 0, i64 0> + ret <2 x i64> %shl +} + +; CHECK: test_srlq_1: +; CHECK: psrlq $0, %xmm0 +; CHECK-NEXT: ret + +define <2 x i64> @test_srlq_2(<2 x i64> %InVec) { +entry: + %shl = lshr <2 x i64> %InVec, <i64 1, i64 1> + ret <2 x i64> %shl +} + +; CHECK: test_srlq_2: +; CHECK: psrlq $1, %xmm0 +; CHECK-NEXT: ret + +define <2 x i64> @test_srlq_3(<2 x i64> %InVec) { +entry: + %shl = lshr <2 x i64> %InVec, <i64 64, i64 64> + ret <2 x i64> %shl +} + +; CHECK: test_srlq_3: +; CHECK: xorps %xmm0, %xmm0 +; CHECK-NEXT: ret |