diff options
author | Duncan Sands <baldrick@free.fr> | 2011-09-22 20:15:48 +0000 |
---|---|---|
committer | Duncan Sands <baldrick@free.fr> | 2011-09-22 20:15:48 +0000 |
commit | 17470bee5fd18bb2eae7825dae535c060a34ee7d (patch) | |
tree | fb9368f9a8f91124a5a1a541ca9a5f48b2d3a6d1 /test | |
parent | d102a03b36fb522899cefc31a396c9793b929cf6 (diff) | |
download | llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.gz llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.bz2 llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.xz |
Synthesize SSE3/AVX 128 bit horizontal add/sub instructions from
floating point add/sub of appropriate shuffle vectors. Does not
synthesize the 256 bit AVX versions because they work differently.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140332 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/haddsub.ll | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll new file mode 100644 index 0000000000..91758ead63 --- /dev/null +++ b/test/CodeGen/X86/haddsub.ll @@ -0,0 +1,194 @@ +; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3 +; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX + +; SSE3: haddpd1: +; SSE3-NOT: vhaddpd +; SSE3: haddpd +; AVX: haddpd1: +; AVX: vhaddpd +define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { + %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2> + %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3> + %r = fadd <2 x double> %a, %b + ret <2 x double> %r +} + +; SSE3: haddpd2: +; SSE3-NOT: vhaddpd +; SSE3: haddpd +; AVX: haddpd2: +; AVX: vhaddpd +define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) { + %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2> + %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1> + %r = fadd <2 x double> %a, %b + ret <2 x double> %r +} + +; SSE3: haddpd3: +; SSE3-NOT: vhaddpd +; SSE3: haddpd +; AVX: haddpd3: +; AVX: vhaddpd +define <2 x double> @haddpd3(<2 x double> %x) { + %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef> + %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef> + %r = fadd <2 x double> %a, %b + ret <2 x double> %r +} + +; SSE3: haddps1: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps1: +; AVX: vhaddps +define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) { + %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: haddps2: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps2: +; AVX: vhaddps +define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) { + %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6> + %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: haddps3: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps3: +; AVX: vhaddps +define <4 x float> @haddps3(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: haddps4: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps4: +; AVX: vhaddps +define <4 x float> @haddps4(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: haddps5: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps5: +; AVX: vhaddps +define <4 x float> @haddps5(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: haddps6: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps6: +; AVX: vhaddps +define <4 x float> @haddps6(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: haddps7: +; SSE3-NOT: vhaddps +; SSE3: haddps +; AVX: haddps7: +; AVX: vhaddps +define <4 x float> @haddps7(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef> + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: hsubpd1: +; SSE3-NOT: vhsubpd +; SSE3: hsubpd +; AVX: hsubpd1: +; AVX: vhsubpd +define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) { + %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2> + %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3> + %r = fsub <2 x double> %a, %b + ret <2 x double> %r +} + +; SSE3: hsubpd2: +; SSE3-NOT: vhsubpd +; SSE3: hsubpd +; AVX: hsubpd2: +; AVX: vhsubpd +define <2 x double> @hsubpd2(<2 x double> %x) { + %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef> + %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef> + %r = fsub <2 x double> %a, %b + ret <2 x double> %r +} + +; SSE3: hsubps1: +; SSE3-NOT: vhsubps +; SSE3: hsubps +; AVX: hsubps1: +; AVX: vhsubps +define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) { + %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %r = fsub <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: hsubps2: +; SSE3-NOT: vhsubps +; SSE3: hsubps +; AVX: hsubps2: +; AVX: vhsubps +define <4 x float> @hsubps2(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> + %r = fsub <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: hsubps3: +; SSE3-NOT: vhsubps +; SSE3: hsubps +; AVX: hsubps3: +; AVX: vhsubps +define <4 x float> @hsubps3(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + %r = fsub <4 x float> %a, %b + ret <4 x float> %r +} + +; SSE3: hsubps4: +; SSE3-NOT: vhsubps +; SSE3: hsubps +; AVX: hsubps4: +; AVX: vhsubps +define <4 x float> @hsubps4(<4 x float> %x) { + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + %r = fsub <4 x float> %a, %b + ret <4 x float> %r +} |