summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorMichael J. Spencer <bigcheesegs@gmail.com>2014-04-24 00:58:18 +0000
committerMichael J. Spencer <bigcheesegs@gmail.com>2014-04-24 00:58:18 +0000
commit96363d50018da6e9cfa549695f01aa0d4d2d6a31 (patch)
treed635c6136ee4443868323391f2e16e4e707433ae /test
parentb53610141c15b6004a6bc8ac060b970ce9da2f50 (diff)
downloadllvm-96363d50018da6e9cfa549695f01aa0d4d2d6a31.tar.gz
llvm-96363d50018da6e9cfa549695f01aa0d4d2d6a31.tar.bz2
llvm-96363d50018da6e9cfa549695f01aa0d4d2d6a31.tar.xz
[InstCombine][x86] Constant fold psll intrinsics.
This excludes avx512 as I don't have hardware to verify. It excludes _dq variants because they are represented in the IR as <{2,4} x i64> when it's actually a byte shift of the entire i{128,265}. This also excludes _dq_bs as they aren't at all supported by the backend. There are also no corresponding instructions in the ISA. I have no idea why they exist... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207058 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/Transforms/InstCombine/vec_demanded_elts.ll110
1 files changed, 110 insertions, 0 deletions
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 70370196e1..35ba09313d 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i16 @test1(float %f) {
entry:
@@ -337,3 +338,112 @@ define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
%a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
ret <4 x double> %a
}
+
+define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+
+; CHECK: test_sse2_1
+; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+}
+
+define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_1
+; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+}
+
+define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+
+; CHECK: test_sse2_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
+attributes #1 = { nounwind readnone }