summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/pmovext.ll
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-09-16 07:39:07 +0000
committerNadav Rotem <nrotem@apple.com>2012-09-16 07:39:07 +0000
commit638e4c13cb25e8ad1044c1d581fb387ec8d15033 (patch)
treeb6e61b7c40456f0778feab86ac8529afa3011633 /test/CodeGen/X86/pmovext.ll
parent3c0e5c9ecedb00d3c36fb2747b642bd3e38d0260 (diff)
downloadllvm-638e4c13cb25e8ad1044c1d581fb387ec8d15033.tar.gz
llvm-638e4c13cb25e8ad1044c1d581fb387ec8d15033.tar.bz2
llvm-638e4c13cb25e8ad1044c1d581fb387ec8d15033.tar.xz
The PMOVZXWD family of functions had patterns extends narrow vector types to wide vector types.
It had patterns for zext-loading and extending. This commit adds patterns for loading a wide type, performing a bitcast, and extending. This is an odd pattern, but it is commonly used when writing code with intrinsics. rdar://11897677 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163995 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/pmovext.ll')
-rw-r--r--test/CodeGen/X86/pmovext.ll22
1 files changed, 22 insertions, 0 deletions
diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll
new file mode 100644
index 0000000000..c4530465c7
--- /dev/null
+++ b/test/CodeGen/X86/pmovext.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+; rdar://11897677
+
+;CHECK: intrin_pmov
+;CHECK: pmovzxbw (%rsi), %xmm0
+;CHECK-NEXT: movdqu
+;CHECK-NEXT: ret
+define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
+ %1 = bitcast i8* %src to <2 x i64>*
+ %2 = load <2 x i64>* %1, align 16
+ %3 = bitcast <2 x i64> %2 to <16 x i8>
+ %4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
+ %5 = bitcast i16* %dest to i8*
+ %6 = bitcast <8 x i16> %4 to <16 x i8>
+ tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind