From 3653b13c52b9bce4fd9e2da3cbb893fc769adaec Mon Sep 17 00:00:00 2001 From: Mon P Wang Date: Thu, 3 Sep 2009 19:57:35 +0000 Subject: Test cases for vector shifts changes r80935 Changed the old vector shift test to use FileCheck git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@80936 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vshift-1.ll | 22 +++++++++++++---- test/CodeGen/X86/vshift-2.ll | 22 +++++++++++++---- test/CodeGen/X86/vshift-3.ll | 19 ++++++++++++--- test/CodeGen/X86/vshift-4.ll | 19 ++++++++++++++- test/CodeGen/X86/vshift-5.ll | 56 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 126 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/X86/vshift-5.ll diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll index 66a8b46725..506049cf69 100644 --- a/test/CodeGen/X86/vshift-1.ll +++ b/test/CodeGen/X86/vshift-1.ll @@ -1,13 +1,12 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -; RUN: grep psllq %t | count 2 -; RUN: grep pslld %t | count 2 -; RUN: grep psllw %t | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s ; test vector shifts converted to proper SSE2 vector shifts when the shift ; amounts are the same. define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind { entry: +; CHECK: shift1a: +; CHECK: psllq %shl = shl <2 x i64> %val, < i64 32, i64 32 > store <2 x i64> %shl, <2 x i64>* %dst ret void @@ -15,6 +14,9 @@ entry: define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind { entry: +; CHECK: shift1b: +; CHECK: movd +; CHECK-NEXT: psllq %0 = insertelement <2 x i64> undef, i64 %amt, i32 0 %1 = insertelement <2 x i64> %0, i64 %amt, i32 1 %shl = shl <2 x i64> %val, %1 @@ -25,6 +27,8 @@ entry: define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind { entry: +; CHECK: shift2a: +; CHECK: pslld %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 > store <4 x i32> %shl, <4 x i32>* %dst ret void @@ -32,6 +36,9 @@ entry: define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind { entry: +; CHECK: shift2b: +; CHECK: movd +; CHECK-NEXT: pslld %0 = insertelement <4 x i32> undef, i32 %amt, i32 0 %1 = insertelement <4 x i32> %0, i32 %amt, i32 1 %2 = insertelement <4 x i32> %1, i32 %amt, i32 2 @@ -43,13 +50,20 @@ entry: define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind { entry: +; CHECK: shift3a: +; CHECK: psllw %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 > store <8 x i16> %shl, <8 x i16>* %dst ret void } +; Make sure the shift amount is properly zero extended. define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind { entry: +; CHECK: shift3b: +; CHECK: movzwl +; CHECK: movd +; CHECK-NEXT: psllw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll index d47a28f855..0f69e740b9 100644 --- a/test/CodeGen/X86/vshift-2.ll +++ b/test/CodeGen/X86/vshift-2.ll @@ -1,13 +1,12 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -; RUN: grep psrlq %t | count 2 -; RUN: grep psrld %t | count 2 -; RUN: grep psrlw %t | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s ; test vector shifts converted to proper SSE2 vector shifts when the shift ; amounts are the same. define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind { entry: +; CHECK: shift1a: +; CHECK: psrlq %lshr = lshr <2 x i64> %val, < i64 32, i64 32 > store <2 x i64> %lshr, <2 x i64>* %dst ret void @@ -15,6 +14,9 @@ entry: define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind { entry: +; CHECK: shift1b: +; CHECK: movd +; CHECK-NEXT: psrlq %0 = insertelement <2 x i64> undef, i64 %amt, i32 0 %1 = insertelement <2 x i64> %0, i64 %amt, i32 1 %lshr = lshr <2 x i64> %val, %1 @@ -24,6 +26,8 @@ entry: define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind { entry: +; CHECK: shift2a: +; CHECK: psrld %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 > store <4 x i32> %lshr, <4 x i32>* %dst ret void @@ -31,6 +35,9 @@ entry: define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind { entry: +; CHECK: shift2b: +; CHECK: movd +; CHECK-NEXT: psrld %0 = insertelement <4 x i32> undef, i32 %amt, i32 0 %1 = insertelement <4 x i32> %0, i32 %amt, i32 1 %2 = insertelement <4 x i32> %1, i32 %amt, i32 2 @@ -43,13 +50,20 @@ entry: define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind { entry: +; CHECK: shift3a: +; CHECK: psrlw %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 > store <8 x i16> %lshr, <8 x i16>* %dst ret void } +; properly zero extend the shift amount define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind { entry: +; CHECK: shift3b: +; CHECK: movzwl +; CHECK: movd +; CHECK-NEXT: psrlw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll index ec087763d1..1ec5497d39 100644 --- a/test/CodeGen/X86/vshift-3.ll +++ b/test/CodeGen/X86/vshift-3.ll @@ -1,13 +1,15 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t -; RUN: grep psrad %t | count 2 -; RUN: grep psraw %t | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s ; test vector shifts converted to proper SSE2 vector shifts when the shift ; amounts are the same. ; Note that x86 does have ashr + +; shift1a can't use a packed shift define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind { entry: +; CHECK: shift1a: +; CHECK: sarl %ashr = ashr <2 x i64> %val, < i64 32, i64 32 > store <2 x i64> %ashr, <2 x i64>* %dst ret void @@ -15,6 +17,8 @@ entry: define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind { entry: +; CHECK: shift2a: +; CHECK: psrad $5 %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 > store <4 x i32> %ashr, <4 x i32>* %dst ret void @@ -22,6 +26,9 @@ entry: define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind { entry: +; CHECK: shift2b: +; CHECK: movd +; CHECK-NEXT: psrad %0 = insertelement <4 x i32> undef, i32 %amt, i32 0 %1 = insertelement <4 x i32> %0, i32 %amt, i32 1 %2 = insertelement <4 x i32> %1, i32 %amt, i32 2 @@ -33,6 +40,8 @@ entry: define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind { entry: +; CHECK: shift3a: +; CHECK: psraw $5 %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 > store <8 x i16> %ashr, <8 x i16>* %dst ret void @@ -40,6 +49,10 @@ entry: define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind { entry: +; CHECK: shift3b: +; CHECK: movzwl +; CHECK: movd +; CHECK-NEXT: psraw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll index 332e8515ad..33ea1fc875 100644 --- a/test/CodeGen/X86/vshift-4.ll +++ b/test/CodeGen/X86/vshift-4.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx -o %t +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s ; RUN: grep psllq %t | count 1 ; RUN: grep pslld %t | count 3 ; RUN: grep psllw %t | count 2 @@ -8,14 +8,19 @@ define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind { entry: +; CHECK: shift1a: +; CHECK: psllq %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> %shl = shl <2 x i64> %val, %shamt store <2 x i64> %shl, <2 x i64>* %dst ret void } +; shift1b can't use a packed shift define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind { entry: +; CHECK: shift1b: +; CHECK: shll %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> %shl = shl <2 x i64> %val, %shamt store <2 x i64> %shl, <2 x i64>* %dst @@ -24,6 +29,8 @@ entry: define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind { entry: +; CHECK: shift2a: +; CHECK: pslld %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> %shl = shl <4 x i32> %val, %shamt store <4 x i32> %shl, <4 x i32>* %dst @@ -32,6 +39,8 @@ entry: define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind { entry: +; CHECK: shift2b: +; CHECK: pslld %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> %shl = shl <4 x i32> %val, %shamt store <4 x i32> %shl, <4 x i32>* %dst @@ -40,6 +49,8 @@ entry: define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind { entry: +; CHECK: shift2c: +; CHECK: pslld %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> %shl = shl <4 x i32> %val, %shamt store <4 x i32> %shl, <4 x i32>* %dst @@ -48,6 +59,9 @@ entry: define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind { entry: +; CHECK: shift3a: +; CHECK: movzwl +; CHECK: psllw %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> %shl = shl <8 x i16> %val, %shamt store <8 x i16> %shl, <8 x i16>* %dst @@ -56,6 +70,9 @@ entry: define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind { entry: +; CHECK: shift3b: +; CHECK: movzwl +; CHECK: psllw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll new file mode 100644 index 0000000000..df2337e663 --- /dev/null +++ b/test/CodeGen/X86/vshift-5.ll @@ -0,0 +1,56 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s + +; When loading the shift amount from memory, avoid generating the splat. + +define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind { +entry: +; CHECK: shift5a: +; CHECK: movd +; CHECK-NEXT: pslld + %amt = load i32* %pamt + %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0 + %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer + %shl = shl <4 x i32> %val, %shamt + store <4 x i32> %shl, <4 x i32>* %dst + ret void +} + + +define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind { +entry: +; CHECK: shift5b: +; CHECK: movd +; CHECK-NEXT: psrad + %amt = load i32* %pamt + %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0 + %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer + %shr = ashr <4 x i32> %val, %shamt + store <4 x i32> %shr, <4 x i32>* %dst + ret void +} + + +define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind { +entry: +; CHECK: shift5c: +; CHECK: movd +; CHECK-NEXT: pslld + %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0 + %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer + %shl = shl <4 x i32> %val, %shamt + store <4 x i32> %shl, <4 x i32>* %dst + ret void +} + + +define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind { +entry: +; CHECK: shift5d: +; CHECK: movd +; CHECK-NEXT: psrad + %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0 + %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer + %shr = ashr <4 x i32> %val, %shamt + store <4 x i32> %shr, <4 x i32>* %dst + ret void +} -- cgit v1.2.3