diff options
author | Dan Gohman <gohman@apple.com> | 2010-03-15 23:23:03 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-03-15 23:23:03 +0000 |
commit | 1bbf72b069d8f01779e99c8de2de8501dd3df20c (patch) | |
tree | 705d30bc08a4517fc94939e4e9cbc267b40a6696 /test/CodeGen/X86/gather-addresses.ll | |
parent | 33cc5cb9837469dabf31cc5a474e2c27d2b7d144 (diff) | |
download | llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.gz llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.bz2 llvm-1bbf72b069d8f01779e99c8de2de8501dd3df20c.tar.xz |
Recognize code for doing vector gather/scatter index calculations with
32-bit indices. Instead of shuffling each element out of the index vector,
when all indices are needed, just store the input vector to the stack and
load the elements out.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98588 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/gather-addresses.ll')
-rw-r--r-- | test/CodeGen/X86/gather-addresses.ll | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll new file mode 100644 index 0000000000..07198386b8 --- /dev/null +++ b/test/CodeGen/X86/gather-addresses.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; When doing vector gather-scatter index calculation with 32-bit indices, +; bounce the vector off of cache rather than shuffling each individual +; element out of the index vector. + +; CHECK: pand (%rdx), %xmm0 +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK: movslq -24(%rsp), %rax +; CHECK: movsd (%rdi,%rax,8), %xmm0 +; CHECK: movslq -20(%rsp), %rax +; CHECK: movhpd (%rdi,%rax,8), %xmm0 +; CHECK: movslq -16(%rsp), %rax +; CHECK: movsd (%rdi,%rax,8), %xmm1 +; CHECK: movslq -12(%rsp), %rax +; CHECK: movhpd (%rdi,%rax,8), %xmm1 + +define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { + %a = load <4 x i32>* %i + %b = load <4 x i32>* %h + %j = and <4 x i32> %a, %b + %d0 = extractelement <4 x i32> %j, i32 0 + %d1 = extractelement <4 x i32> %j, i32 1 + %d2 = extractelement <4 x i32> %j, i32 2 + %d3 = extractelement <4 x i32> %j, i32 3 + %q0 = getelementptr double* %p, i32 %d0 + %q1 = getelementptr double* %p, i32 %d1 + %q2 = getelementptr double* %p, i32 %d2 + %q3 = getelementptr double* %p, i32 %d3 + %r0 = load double* %q0 + %r1 = load double* %q1 + %r2 = load double* %q2 + %r3 = load double* %q3 + %v0 = insertelement <4 x double> undef, double %r0, i32 0 + %v1 = insertelement <4 x double> %v0, double %r1, i32 1 + %v2 = insertelement <4 x double> %v1, double %r2, i32 2 + %v3 = insertelement <4 x double> %v2, double %r3, i32 3 + ret <4 x double> %v3 +} |