[X86] Add two combine rules to simplify dag nodes introduced during type legalization when promoting nodes with illegal vector type.

This patch teaches the backend how to simplify/canonicalize dag node sequences normally introduced by the backend when promoting certain dag nodes with illegal vector type. This patch adds two new combine rules: 1) fold (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) -> (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>) 2) fold (BINOP (shuffle (A, Undef, <Mask>)), (shuffle (B, Undef, <Mask>))) -> (shuffle (BINOP A, B), Undef, <Mask>). Both rules are only triggered on the type-legalized DAG. In particular, rule 1. is a target specific combine rule that attempts to sink a bitconvert into the operands of a binary operation. Rule 2. is a target independet rule that attempts to move a shuffle immediately after a binary operation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209930 91177308-0d34-0410-b5e6-96231b3b80d8
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-05-30 23:17:53 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-05-30 23:17:53 +0000
commit: 1726e2ff15a44ea39b6bc1fb719a459de5656b8b (patch)
tree: 24f4b08344c32fe74c3a6d75d3d0e29793c3fdec /test
parent: d99cefbad17a3680601914ff73a28d3214f91c70 (diff)
download: llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.gz
llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.bz2
llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.xz
2 files changed, 281 insertions, 27 deletions
diff --git a/test/CodeGen/X86/combine-64bit-vec-binop.ll b/test/CodeGen/X86/combine-64bit-vec-binop.ll
new file mode 100644
index 0000000000..8440fdab0e
--- /dev/null
+++ b/test/CodeGen/X86/combine-64bit-vec-binop.ll
@@ -0,0 +1,273 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+define double @test1_add(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %add = add <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test1_add
+; SSE41: paddd
+; AVX: vpaddd
+; CHECK-NEXT: ret
+
+
+define double @test2_add(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %add = add <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test2_add
+; SSE41: paddw
+; AVX: vpaddw
+; CHECK-NEXT: ret
+
+define double @test3_add(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %add = add <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test3_add
+; SSE41: paddb
+; AVX: vpaddb
+; CHECK-NEXT: ret
+
+
+define double @test1_sub(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %sub = sub <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test1_sub
+; SSE41: psubd
+; AVX: vpsubd
+; CHECK-NEXT: ret
+
+
+define double @test2_sub(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %sub = sub <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test2_sub
+; SSE41: psubw
+; AVX: vpsubw
+; CHECK-NEXT: ret
+
+
+define double @test3_sub(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %sub = sub <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test3_sub
+; SSE41: psubb
+; AVX: vpsubb
+; CHECK-NEXT: ret
+
+
+define double @test1_mul(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %mul = mul <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test1_mul
+; SSE41: pmulld
+; AVX: vpmulld
+; CHECK-NEXT: ret
+
+
+define double @test2_mul(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %mul = mul <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test2_mul
+; SSE41: pmullw
+; AVX: vpmullw
+; CHECK-NEXT: ret
+
+; There is no legal ISD::MUL with type MVT::v8i16.
+define double @test3_mul(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %mul = mul <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test3_mul
+; CHECK: pmullw
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
+
+define double @test1_and(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %and = and <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %and to double
+  ret double %3
+}
+; CHECK-LABEL: test1_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test2_and(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %and = and <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %and to double
+  ret double %3
+}
+; CHECK-LABEL: test2_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test3_and(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %and = and <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %and to double
+  ret double %3
+}
+; CHECK-LABEL: test3_and
+; SSE41: andps
+; AVX: vandps
+; CHECK-NEXT: ret
+
+
+define double @test1_or(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %or = or <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %or to double
+  ret double %3
+}
+; CHECK-LABEL: test1_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test2_or(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %or = or <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %or to double
+  ret double %3
+}
+; CHECK-LABEL: test2_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test3_or(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %or = or <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %or to double
+  ret double %3
+}
+; CHECK-LABEL: test3_or
+; SSE41: orps
+; AVX: vorps
+; CHECK-NEXT: ret
+
+
+define double @test1_xor(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %xor = xor <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %xor to double
+  ret double %3
+}
+; CHECK-LABEL: test1_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test2_xor(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %xor = xor <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %xor to double
+  ret double %3
+}
+; CHECK-LABEL: test2_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test3_xor(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %xor = xor <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %xor to double
+  ret double %3
+}
+; CHECK-LABEL: test3_xor
+; SSE41: xorps
+; AVX: vxorps
+; CHECK-NEXT: ret
+
+
+define double @test_fadd(double %A, double %B) {
+  %1 = bitcast double %A to <2 x float>
+  %2 = bitcast double %B to <2 x float>
+  %add = fadd <2 x float> %1, %2
+  %3 = bitcast <2 x float> %add to double
+  ret double %3
+}
+; CHECK-LABEL: test_fadd
+; SSE41: addps
+; AVX: vaddps
+; CHECK-NEXT: ret
+
+define double @test_fsub(double %A, double %B) {
+  %1 = bitcast double %A to <2 x float>
+  %2 = bitcast double %B to <2 x float>
+  %sub = fsub <2 x float> %1, %2
+  %3 = bitcast <2 x float> %sub to double
+  ret double %3
+}
+; CHECK-LABEL: test_fsub
+; SSE41: subps
+; AVX: vsubps
+; CHECK-NEXT: ret
+
+define double @test_fmul(double %A, double %B) {
+  %1 = bitcast double %A to <2 x float>
+  %2 = bitcast double %B to <2 x float>
+  %mul = fmul <2 x float> %1, %2
+  %3 = bitcast <2 x float> %mul to double
+  ret double %3
+}
+; CHECK-LABEL: test_fmul
+; SSE41: mulps
+; AVX: vmulps
+; CHECK-NEXT: ret
+
diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll
index b9b29a558e..769831ee81 100644
--- a/test/CodeGen/X86/lower-bitcast.ll
+++ b/test/CodeGen/X86/lower-bitcast.ll
@@ -14,7 +14,7 @@ define double @test1(double %A) {
 ; CHECK-LABEL: test1
 ; CHECK-NOT: movsd
 ; CHECK: pshufd
-; CHECK-NEXT: paddq
+; CHECK-NEXT: paddd
 ; CHECK-NEXT: pshufd
 ; CHECK-NEXT: ret
 
@@ -26,16 +26,9 @@ define double @test2(double %A, double %B) {
   %3 = bitcast <2 x i32> %add to double
   ret double %3
 }
-; FIXME: Ideally we should be able to fold the entire body of @test2 into a
-; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
-; sequence pshufd+pshufd+paddq+pshufd.
-
 ; CHECK-LABEL: test2
 ; CHECK-NOT: movsd
-; CHECK: pshufd
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: paddq
-; CHECK-NEXT: pshufd
+; CHECK: paddd
 ; CHECK-NEXT: ret
 
 
@@ -91,7 +84,7 @@ define double @test6(double %A) {
 ; CHECK-LABEL: test6
 ; CHECK-NOT: movsd
 ; CHECK: punpcklwd
-; CHECK-NEXT: paddd
+; CHECK-NEXT: paddw
 ; CHECK-NEXT: pshufb
 ; CHECK-NEXT: ret
 
@@ -103,16 +96,10 @@ define double @test7(double %A, double %B) {
   %3 = bitcast <4 x i16> %add to double
   ret double %3
 }
-; FIXME: Ideally we should be able to fold the entire body of @test7 into a
-; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the
-; sequence pshufd+pshufd+paddd+pshufd.
-
 ; CHECK-LABEL: test7
 ; CHECK-NOT: movsd
-; CHECK: punpcklwd
-; CHECK-NEXT: punpcklwd
-; CHECK-NEXT: paddd
-; CHECK-NEXT: pshufb
+; CHECK-NOT: punpcklwd
+; CHECK: paddw
 ; CHECK-NEXT: ret
 
 
@@ -129,7 +116,7 @@ define double @test8(double %A) {
 ; CHECK-LABEL: test8
 ; CHECK-NOT: movsd
 ; CHECK: punpcklbw
-; CHECK-NEXT: paddw
+; CHECK-NEXT: paddb
 ; CHECK-NEXT: pshufb
 ; CHECK-NEXT: ret
 
@@ -141,15 +128,9 @@ define double @test9(double %A, double %B) {
   %3 = bitcast <8 x i8> %add to double
   ret double %3
 }
-; FIXME: Ideally we should be able to fold the entire body of @test9 into a
-; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the
-; sequence pshufd+pshufd+paddw+pshufd.
-
 ; CHECK-LABEL: test9
 ; CHECK-NOT: movsd
-; CHECK: punpcklbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: paddw
-; CHECK-NEXT: pshufb
+; CHECK-NOT: punpcklbw
+; CHECK: paddb
 ; CHECK-NEXT: ret
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-05-30 23:17:53 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-05-30 23:17:53 +0000
commit	1726e2ff15a44ea39b6bc1fb719a459de5656b8b (patch)
tree	24f4b08344c32fe74c3a6d75d3d0e29793c3fdec /test
parent	d99cefbad17a3680601914ff73a28d3214f91c70 (diff)
download	llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.gz llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.bz2 llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.xz