Recognize code for doing vector gather/scatter index calculations with

32-bit indices. Instead of shuffling each element out of the index vector, when all indices are needed, just store the input vector to the stack and load the elements out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98588 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2010-03-15 23:23:03 +0000
committer: Dan Gohman <gohman@apple.com> 2010-03-15 23:23:03 +0000
commit: 1bbf72b069d8f01779e99c8de2de8501dd3df20c (patch)
tree: 705d30bc08a4517fc94939e4e9cbc267b40a6696 /test/CodeGen/X86/gather-addresses.ll
parent: 33cc5cb9837469dabf31cc5a474e2c27d2b7d144 (diff)
download: llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.gz
llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.bz2
llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.xz
1 files changed, 39 insertions, 0 deletions
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
new file mode 100644
index 0000000000..07198386b8
--- /dev/null
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; When doing vector gather-scatter index calculation with 32-bit indices,
+; bounce the vector off of cache rather than shuffling each individual
+; element out of the index vector.
+
+; CHECK: pand     (%rdx), %xmm0
+; CHECK: movaps   %xmm0, -24(%rsp)
+; CHECK: movslq   -24(%rsp), %rax
+; CHECK: movsd    (%rdi,%rax,8), %xmm0
+; CHECK: movslq   -20(%rsp), %rax
+; CHECK: movhpd   (%rdi,%rax,8), %xmm0
+; CHECK: movslq   -16(%rsp), %rax
+; CHECK: movsd    (%rdi,%rax,8), %xmm1
+; CHECK: movslq   -12(%rsp), %rax
+; CHECK: movhpd   (%rdi,%rax,8), %xmm1
+
+define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
+  %a = load <4 x i32>* %i
+  %b = load <4 x i32>* %h
+  %j = and <4 x i32> %a, %b
+  %d0 = extractelement <4 x i32> %j, i32 0
+  %d1 = extractelement <4 x i32> %j, i32 1
+  %d2 = extractelement <4 x i32> %j, i32 2
+  %d3 = extractelement <4 x i32> %j, i32 3
+  %q0 = getelementptr double* %p, i32 %d0
+  %q1 = getelementptr double* %p, i32 %d1
+  %q2 = getelementptr double* %p, i32 %d2
+  %q3 = getelementptr double* %p, i32 %d3
+  %r0 = load double* %q0
+  %r1 = load double* %q1
+  %r2 = load double* %q2
+  %r3 = load double* %q3
+  %v0 = insertelement <4 x double> undef, double %r0, i32 0
+  %v1 = insertelement <4 x double> %v0, double %r1, i32 1
+  %v2 = insertelement <4 x double> %v1, double %r2, i32 2
+  %v3 = insertelement <4 x double> %v2, double %r3, i32 3
+  ret <4 x double> %v3
+}
author	Dan Gohman <gohman@apple.com>	2010-03-15 23:23:03 +0000
committer	Dan Gohman <gohman@apple.com>	2010-03-15 23:23:03 +0000
commit	1bbf72b069d8f01779e99c8de2de8501dd3df20c (patch)
tree	705d30bc08a4517fc94939e4e9cbc267b40a6696 /test/CodeGen/X86/gather-addresses.ll
parent	33cc5cb9837469dabf31cc5a474e2c27d2b7d144 (diff)
download	llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.gz llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.bz2 llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.xz