summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2012-04-15 11:18:59 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2012-04-15 11:18:59 +0000
commit73c504af9d86a426532ee32c5d07a4b872794675 (patch)
tree32a15d77d0ad292ad408ebdf0798c1360b66aa75 /test
parent8a81df1b7fe2e8c3386c24ef6a4e9cf48cb5f2f1 (diff)
downloadllvm-73c504af9d86a426532ee32c5d07a4b872794675.tar.gz
llvm-73c504af9d86a426532ee32c5d07a4b872794675.tar.bz2
llvm-73c504af9d86a426532ee32c5d07a4b872794675.tar.xz
Added VPERM optimization for AVX2 shuffles
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154761 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rwxr-xr-xtest/CodeGen/X86/avx2-vperm.ll34
1 files changed, 34 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx2-vperm.ll b/test/CodeGen/X86/avx2-vperm.ll
new file mode 100755
index 0000000000..d576d0e374
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vperm.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_8x32
+; CHECK: vpermd
+ %B = shufflevector <8 x i32> %A, <8 x i32> undef, <8 x i32> <i32 0, i32 7, i32 2, i32 1, i32 2, i32 7, i32 6, i32 0>
+ ret <8 x i32> %B
+}
+
+
+define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_8x32
+; CHECK: vpermps
+ %B = shufflevector <8 x float> %A, <8 x float> undef, <8 x i32> <i32 undef, i32 7, i32 2, i32 undef, i32 4, i32 undef, i32 1, i32 6>
+ ret <8 x float> %B
+}
+
+define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_4x64
+; CHECK: vpermq
+ %B = shufflevector <4 x i64> %A, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+ ret <4 x i64> %B
+}
+
+define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_4x64
+; CHECK: vpermpd
+ %B = shufflevector <4 x double> %A, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+ ret <4 x double> %B
+}