diff options
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/avx512-arith.ll | 113 | ||||
-rw-r--r-- | test/CodeGen/X86/viabs.ll | 87 |
2 files changed, 200 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index 3966552e95..4d1c9f7cd9 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -163,6 +163,40 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ret <8 x i64> %x } +; CHECK-LABEL: vpaddq_fold_test +; CHECK: vpaddq (% +; CHECK: ret +define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { + %tmp = load <8 x i64>* %j, align 4 + %x = add <8 x i64> %i, %tmp + ret <8 x i64> %x +} + +; CHECK-LABEL: vpaddq_broadcast_test +; CHECK: vpaddq LCP{{.*}}(%rip){1to8} +; CHECK: ret +define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { + %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> + ret <8 x i64> %x +} + +; CHECK-LABEL: vpaddq_broadcast2_test +; CHECK: vpaddq (%rdi){1to8} +; CHECK: ret +define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { + %tmp = load i64* %j + %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 + %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 + %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 + %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 + %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 + %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 + %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 + %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 + %x = add <8 x i64> %i, %j.7 + ret <8 x i64> %x +} + ; CHECK-LABEL: vpaddd_test ; CHECK: vpaddd %zmm ; CHECK: ret @@ -171,6 +205,85 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ret <16 x i32> %x } +; CHECK-LABEL: vpaddd_fold_test +; CHECK: vpaddd (% +; CHECK: ret +define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { + %tmp = load <16 x i32>* %j, align 4 + %x = add <16 x i32> %i, %tmp + ret <16 x i32> %x +} + +; CHECK-LABEL: vpaddd_broadcast_test +; CHECK: vpaddd LCP{{.*}}(%rip){1to16} +; CHECK: ret +define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { + %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <16 x i32> %x +} + +; CHECK-LABEL: vpaddd_mask_test +; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }} +; CHECK: ret +define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = add <16 x i32> %i, %j + %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i + ret <16 x i32> %r +} + +; CHECK-LABEL: vpaddd_maskz_test +; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }} +; CHECK: ret +define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = add <16 x i32> %i, %j + %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %r +} + +; CHECK-LABEL: vpaddd_mask_fold_test +; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }} +; CHECK: ret +define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %j = load <16 x i32>* %j.ptr + %x = add <16 x i32> %i, %j + %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i + ret <16 x i32> %r +} + +; CHECK-LABEL: vpaddd_mask_broadcast_test +; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }} +; CHECK: ret +define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i + ret <16 x i32> %r +} + +; CHECK-LABEL: vpaddd_maskz_fold_test +; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z} +; CHECK: ret +define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %j = load <16 x i32>* %j.ptr + %x = add <16 x i32> %i, %j + %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %r +} + +; CHECK-LABEL: vpaddd_maskz_broadcast_test +; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z} +; CHECK: ret +define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { + %mask = icmp ne <16 x i32> %mask1, zeroinitializer + %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %r +} + ; CHECK-LABEL: vpsubq_test ; CHECK: vpsubq %zmm ; CHECK: ret diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll index 0be00da83f..d9f2cb0747 100644 --- a/test/CodeGen/X86/viabs.ll +++ b/test/CodeGen/X86/viabs.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3 ; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2 +; RUN: llc < %s -march=x86-64 -mcpu=knl | FileCheck %s -check-prefix=AVX512 define <4 x i32> @test1(<4 x i32> %a) nounwind { ; SSE2-LABEL: test1: @@ -17,6 +18,10 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind { ; AVX2-LABEL: test1: ; AVX2: vpabsd ; AVX2-NEXT: ret + +; AVX512-LABEL: test1: +; AVX512: vpabsd +; AVX512-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg @@ -38,6 +43,10 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind { ; AVX2-LABEL: test2: ; AVX2: vpabsd ; AVX2-NEXT: ret + +; AVX512-LABEL: test2: +; AVX512: vpabsd +; AVX512-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sge <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg @@ -59,6 +68,10 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind { ; AVX2-LABEL: test3: ; AVX2: vpabsw ; AVX2-NEXT: ret + +; AVX512-LABEL: test3: +; AVX512: vpabsw +; AVX512-NEXT: ret %tmp1neg = sub <8 x i16> zeroinitializer, %a %b = icmp sgt <8 x i16> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg @@ -80,6 +93,10 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind { ; AVX2-LABEL: test4: ; AVX2: vpabsb ; AVX2-NEXT: ret + +; AVX512-LABEL: test4: +; AVX512: vpabsb +; AVX512-NEXT: ret %tmp1neg = sub <16 x i8> zeroinitializer, %a %b = icmp slt <16 x i8> %a, zeroinitializer %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a @@ -101,6 +118,10 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind { ; AVX2-LABEL: test5: ; AVX2: vpabsd ; AVX2-NEXT: ret + +; AVX512-LABEL: test5: +; AVX512: vpabsd +; AVX512-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sle <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a @@ -116,6 +137,10 @@ define <8 x i32> @test6(<8 x i32> %a) nounwind { ; AVX2-LABEL: test6: ; AVX2: vpabsd {{.*}}%ymm ; AVX2-NEXT: ret + +; AVX512-LABEL: test6: +; AVX512: vpabsd {{.*}}%ymm +; AVX512-NEXT: ret %tmp1neg = sub <8 x i32> zeroinitializer, %a %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg @@ -131,6 +156,10 @@ define <8 x i32> @test7(<8 x i32> %a) nounwind { ; AVX2-LABEL: test7: ; AVX2: vpabsd {{.*}}%ymm ; AVX2-NEXT: ret + +; AVX512-LABEL: test7: +; AVX512: vpabsd {{.*}}%ymm +; AVX512-NEXT: ret %tmp1neg = sub <8 x i32> zeroinitializer, %a %b = icmp sge <8 x i32> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg @@ -146,6 +175,10 @@ define <16 x i16> @test8(<16 x i16> %a) nounwind { ; AVX2-LABEL: test8: ; AVX2: vpabsw {{.*}}%ymm ; AVX2-NEXT: ret + +; AVX512-LABEL: test8: +; AVX512: vpabsw {{.*}}%ymm +; AVX512-NEXT: ret %tmp1neg = sub <16 x i16> zeroinitializer, %a %b = icmp sgt <16 x i16> %a, zeroinitializer %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg @@ -161,6 +194,10 @@ define <32 x i8> @test9(<32 x i8> %a) nounwind { ; AVX2-LABEL: test9: ; AVX2: vpabsb {{.*}}%ymm ; AVX2-NEXT: ret + +; AVX512-LABEL: test9: +; AVX512: vpabsb {{.*}}%ymm +; AVX512-NEXT: ret %tmp1neg = sub <32 x i8> zeroinitializer, %a %b = icmp slt <32 x i8> %a, zeroinitializer %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a @@ -176,8 +213,58 @@ define <8 x i32> @test10(<8 x i32> %a) nounwind { ; AVX2-LABEL: test10: ; AVX2: vpabsd {{.*}}%ymm ; AVX2-NEXT: ret + +; AVX512-LABEL: test10: +; AVX512: vpabsd {{.*}}%ymm +; AVX512-NEXT: ret %tmp1neg = sub <8 x i32> zeroinitializer, %a %b = icmp sle <8 x i32> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a ret <8 x i32> %abs } + +define <16 x i32> @test11(<16 x i32> %a) nounwind { +; AVX2-LABEL: test11: +; AVX2: vpabsd +; AVX2: vpabsd +; AVX2-NEXT: ret + +; AVX512-LABEL: test11: +; AVX512: vpabsd {{.*}}%zmm +; AVX512-NEXT: ret + %tmp1neg = sub <16 x i32> zeroinitializer, %a + %b = icmp sle <16 x i32> %a, zeroinitializer + %abs = select <16 x i1> %b, <16 x i32> %tmp1neg, <16 x i32> %a + ret <16 x i32> %abs +} + +define <8 x i64> @test12(<8 x i64> %a) nounwind { +; AVX2-LABEL: test12: +; AVX2: vpxor +; AVX2: vpxor +; AVX2-NEXT: ret + +; AVX512-LABEL: test12: +; AVX512: vpabsq {{.*}}%zmm +; AVX512-NEXT: ret + %tmp1neg = sub <8 x i64> zeroinitializer, %a + %b = icmp sle <8 x i64> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a + ret <8 x i64> %abs +} + +define <8 x i64> @test13(<8 x i64>* %a.ptr) nounwind { +; AVX2-LABEL: test13: +; AVX2: vpxor +; AVX2: vpxor +; AVX2-NEXT: ret + +; AVX512-LABEL: test13: +; AVX512: vpabsq (% +; AVX512-NEXT: ret + %a = load <8 x i64>* %a.ptr, align 8 + %tmp1neg = sub <8 x i64> zeroinitializer, %a + %b = icmp sle <8 x i64> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a + ret <8 x i64> %abs +} |