Synthesize SSE3/AVX 128 bit horizontal add/sub instructions from

floating point add/sub of appropriate shuffle vectors. Does not synthesize the 256 bit AVX versions because they work differently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140332 91177308-0d34-0410-b5e6-96231b3b80d8
author: Duncan Sands <baldrick@free.fr> 2011-09-22 20:15:48 +0000
committer: Duncan Sands <baldrick@free.fr> 2011-09-22 20:15:48 +0000
commit: 17470bee5fd18bb2eae7825dae535c060a34ee7d (patch)
tree: fb9368f9a8f91124a5a1a541ca9a5f48b2d3a6d1 /test
parent: d102a03b36fb522899cefc31a396c9793b929cf6 (diff)
download: llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.gz
llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.bz2
llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.xz
1 files changed, 194 insertions, 0 deletions
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
new file mode 100644
index 0000000000..91758ead63
--- /dev/null
+++ b/test/CodeGen/X86/haddsub.ll
@@ -0,0 +1,194 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3
+; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX
+
+; SSE3: haddpd1:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; AVX: haddpd1:
+; AVX: vhaddpd
+define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
+  %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
+  %r = fadd <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: haddpd2:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; AVX: haddpd2:
+; AVX: vhaddpd
+define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
+  %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
+  %r = fadd <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: haddpd3:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; AVX: haddpd3:
+; AVX: vhaddpd
+define <2 x double> @haddpd3(<2 x double> %x) {
+  %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %r = fadd <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: haddps1:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps1:
+; AVX: vhaddps
+define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps2:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps2:
+; AVX: vhaddps
+define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+  %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps3:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps3:
+; AVX: vhaddps
+define <4 x float> @haddps3(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps4:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps4:
+; AVX: vhaddps
+define <4 x float> @haddps4(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps5:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps5:
+; AVX: vhaddps
+define <4 x float> @haddps5(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps6:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps6:
+; AVX: vhaddps
+define <4 x float> @haddps6(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: haddps7:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; AVX: haddps7:
+; AVX: vhaddps
+define <4 x float> @haddps7(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
+  %r = fadd <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubpd1:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; AVX: hsubpd1:
+; AVX: vhsubpd
+define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
+  %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
+  %r = fsub <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: hsubpd2:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; AVX: hsubpd2:
+; AVX: vhsubpd
+define <2 x double> @hsubpd2(<2 x double> %x) {
+  %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %r = fsub <2 x double> %a, %b
+  ret <2 x double> %r
+}
+
+; SSE3: hsubps1:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps1:
+; AVX: vhsubps
+define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubps2:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps2:
+; AVX: vhsubps
+define <4 x float> @hsubps2(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubps3:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps3:
+; AVX: vhsubps
+define <4 x float> @hsubps3(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
+
+; SSE3: hsubps4:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; AVX: hsubps4:
+; AVX: vhsubps
+define <4 x float> @hsubps4(<4 x float> %x) {
+  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = fsub <4 x float> %a, %b
+  ret <4 x float> %r
+}
author	Duncan Sands <baldrick@free.fr>	2011-09-22 20:15:48 +0000
committer	Duncan Sands <baldrick@free.fr>	2011-09-22 20:15:48 +0000
commit	17470bee5fd18bb2eae7825dae535c060a34ee7d (patch)
tree	fb9368f9a8f91124a5a1a541ca9a5f48b2d3a6d1 /test
parent	d102a03b36fb522899cefc31a396c9793b929cf6 (diff)
download	llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.gz llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.bz2 llvm-17470bee5fd18bb2eae7825dae535c060a34ee7d.tar.xz