On Sandybridge loading unaligned 256bits using two XMM loads (vmovups and vinsertf128) is faster than using a single vmovups instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172868 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2013-01-18 23:10:30 +0000
committer: Nadav Rotem <nrotem@apple.com> 2013-01-18 23:10:30 +0000
commit: 48177ac90fb940833b9deea1a6716092348cfe82 (patch)
tree: 5252f0617e256f0dd1f8b26082f05a088d7232b9 /test/CodeGen/X86/sandybridge-loads.ll
parent: 7336f7febb5170b374a4cbffee273ad82ff8a1a3 (diff)
download: llvm-48177ac90fb940833b9deea1a6716092348cfe82.tar.gz
llvm-48177ac90fb940833b9deea1a6716092348cfe82.tar.bz2
llvm-48177ac90fb940833b9deea1a6716092348cfe82.tar.xz
1 files changed, 21 insertions, 0 deletions
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll
new file mode 100644
index 0000000000..d85c32eaa7
--- /dev/null
+++ b/test/CodeGen/X86/sandybridge-loads.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: wideloads
+;CHECK: vmovaps
+;CHECK: vinsertf128
+;CHECK: vmovups
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+
+define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16  ; <---- unaligned!
+  %v1 = load <8 x float>* %b, align 32  ; <---- aligned!
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 16
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 16
+  ret void
+}
+
author	Nadav Rotem <nrotem@apple.com>	2013-01-18 23:10:30 +0000
committer	Nadav Rotem <nrotem@apple.com>	2013-01-18 23:10:30 +0000
commit	48177ac90fb940833b9deea1a6716092348cfe82 (patch)
tree	5252f0617e256f0dd1f8b26082f05a088d7232b9 /test/CodeGen/X86/sandybridge-loads.ll
parent	7336f7febb5170b374a4cbffee273ad82ff8a1a3 (diff)
download	llvm-48177ac90fb940833b9deea1a6716092348cfe82.tar.gz llvm-48177ac90fb940833b9deea1a6716092348cfe82.tar.bz2 llvm-48177ac90fb940833b9deea1a6716092348cfe82.tar.xz