diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-05-30 23:17:53 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-05-30 23:17:53 +0000 |
commit | 1726e2ff15a44ea39b6bc1fb719a459de5656b8b (patch) | |
tree | 24f4b08344c32fe74c3a6d75d3d0e29793c3fdec /test | |
parent | d99cefbad17a3680601914ff73a28d3214f91c70 (diff) | |
download | llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.gz llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.bz2 llvm-1726e2ff15a44ea39b6bc1fb719a459de5656b8b.tar.xz |
[X86] Add two combine rules to simplify dag nodes introduced during type legalization when promoting nodes with illegal vector type.
This patch teaches the backend how to simplify/canonicalize dag node
sequences normally introduced by the backend when promoting certain dag nodes
with illegal vector type.
This patch adds two new combine rules:
1) fold (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
(shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
2) fold (BINOP (shuffle (A, Undef, <Mask>)), (shuffle (B, Undef, <Mask>))) ->
(shuffle (BINOP A, B), Undef, <Mask>).
Both rules are only triggered on the type-legalized DAG.
In particular, rule 1. is a target specific combine rule that attempts
to sink a bitconvert into the operands of a binary operation.
Rule 2. is a target independet rule that attempts to move a shuffle
immediately after a binary operation.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209930 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/combine-64bit-vec-binop.ll | 273 | ||||
-rw-r--r-- | test/CodeGen/X86/lower-bitcast.ll | 35 |
2 files changed, 281 insertions, 27 deletions
diff --git a/test/CodeGen/X86/combine-64bit-vec-binop.ll b/test/CodeGen/X86/combine-64bit-vec-binop.ll new file mode 100644 index 0000000000..8440fdab0e --- /dev/null +++ b/test/CodeGen/X86/combine-64bit-vec-binop.ll @@ -0,0 +1,273 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 +; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK -check-prefix=AVX + + +define double @test1_add(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %add = add <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %add to double + ret double %3 +} +; CHECK-LABEL: test1_add +; SSE41: paddd +; AVX: vpaddd +; CHECK-NEXT: ret + + +define double @test2_add(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %add = add <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %add to double + ret double %3 +} +; CHECK-LABEL: test2_add +; SSE41: paddw +; AVX: vpaddw +; CHECK-NEXT: ret + +define double @test3_add(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %add = add <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %add to double + ret double %3 +} +; CHECK-LABEL: test3_add +; SSE41: paddb +; AVX: vpaddb +; CHECK-NEXT: ret + + +define double @test1_sub(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %sub = sub <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %sub to double + ret double %3 +} +; CHECK-LABEL: test1_sub +; SSE41: psubd +; AVX: vpsubd +; CHECK-NEXT: ret + + +define double @test2_sub(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %sub = sub <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %sub to double + ret double %3 +} +; CHECK-LABEL: test2_sub +; SSE41: psubw +; AVX: vpsubw +; CHECK-NEXT: ret + + +define double @test3_sub(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %sub = sub <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %sub to double + ret double %3 +} +; CHECK-LABEL: test3_sub +; SSE41: psubb +; AVX: vpsubb +; CHECK-NEXT: ret + + +define double @test1_mul(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %mul = mul <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %mul to double + ret double %3 +} +; CHECK-LABEL: test1_mul +; SSE41: pmulld +; AVX: vpmulld +; CHECK-NEXT: ret + + +define double @test2_mul(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %mul = mul <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %mul to double + ret double %3 +} +; CHECK-LABEL: test2_mul +; SSE41: pmullw +; AVX: vpmullw +; CHECK-NEXT: ret + +; There is no legal ISD::MUL with type MVT::v8i16. +define double @test3_mul(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %mul = mul <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %mul to double + ret double %3 +} +; CHECK-LABEL: test3_mul +; CHECK: pmullw +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret + + +define double @test1_and(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %and = and <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %and to double + ret double %3 +} +; CHECK-LABEL: test1_and +; SSE41: andps +; AVX: vandps +; CHECK-NEXT: ret + + +define double @test2_and(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %and = and <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %and to double + ret double %3 +} +; CHECK-LABEL: test2_and +; SSE41: andps +; AVX: vandps +; CHECK-NEXT: ret + + +define double @test3_and(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %and = and <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %and to double + ret double %3 +} +; CHECK-LABEL: test3_and +; SSE41: andps +; AVX: vandps +; CHECK-NEXT: ret + + +define double @test1_or(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %or = or <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %or to double + ret double %3 +} +; CHECK-LABEL: test1_or +; SSE41: orps +; AVX: vorps +; CHECK-NEXT: ret + + +define double @test2_or(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %or = or <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %or to double + ret double %3 +} +; CHECK-LABEL: test2_or +; SSE41: orps +; AVX: vorps +; CHECK-NEXT: ret + + +define double @test3_or(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %or = or <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %or to double + ret double %3 +} +; CHECK-LABEL: test3_or +; SSE41: orps +; AVX: vorps +; CHECK-NEXT: ret + + +define double @test1_xor(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %xor = xor <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %xor to double + ret double %3 +} +; CHECK-LABEL: test1_xor +; SSE41: xorps +; AVX: vxorps +; CHECK-NEXT: ret + + +define double @test2_xor(double %A, double %B) { + %1 = bitcast double %A to <4 x i16> + %2 = bitcast double %B to <4 x i16> + %xor = xor <4 x i16> %1, %2 + %3 = bitcast <4 x i16> %xor to double + ret double %3 +} +; CHECK-LABEL: test2_xor +; SSE41: xorps +; AVX: vxorps +; CHECK-NEXT: ret + + +define double @test3_xor(double %A, double %B) { + %1 = bitcast double %A to <8 x i8> + %2 = bitcast double %B to <8 x i8> + %xor = xor <8 x i8> %1, %2 + %3 = bitcast <8 x i8> %xor to double + ret double %3 +} +; CHECK-LABEL: test3_xor +; SSE41: xorps +; AVX: vxorps +; CHECK-NEXT: ret + + +define double @test_fadd(double %A, double %B) { + %1 = bitcast double %A to <2 x float> + %2 = bitcast double %B to <2 x float> + %add = fadd <2 x float> %1, %2 + %3 = bitcast <2 x float> %add to double + ret double %3 +} +; CHECK-LABEL: test_fadd +; SSE41: addps +; AVX: vaddps +; CHECK-NEXT: ret + +define double @test_fsub(double %A, double %B) { + %1 = bitcast double %A to <2 x float> + %2 = bitcast double %B to <2 x float> + %sub = fsub <2 x float> %1, %2 + %3 = bitcast <2 x float> %sub to double + ret double %3 +} +; CHECK-LABEL: test_fsub +; SSE41: subps +; AVX: vsubps +; CHECK-NEXT: ret + +define double @test_fmul(double %A, double %B) { + %1 = bitcast double %A to <2 x float> + %2 = bitcast double %B to <2 x float> + %mul = fmul <2 x float> %1, %2 + %3 = bitcast <2 x float> %mul to double + ret double %3 +} +; CHECK-LABEL: test_fmul +; SSE41: mulps +; AVX: vmulps +; CHECK-NEXT: ret + diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll index b9b29a558e..769831ee81 100644 --- a/test/CodeGen/X86/lower-bitcast.ll +++ b/test/CodeGen/X86/lower-bitcast.ll @@ -14,7 +14,7 @@ define double @test1(double %A) { ; CHECK-LABEL: test1 ; CHECK-NOT: movsd ; CHECK: pshufd -; CHECK-NEXT: paddq +; CHECK-NEXT: paddd ; CHECK-NEXT: pshufd ; CHECK-NEXT: ret @@ -26,16 +26,9 @@ define double @test2(double %A, double %B) { %3 = bitcast <2 x i32> %add to double ret double %3 } -; FIXME: Ideally we should be able to fold the entire body of @test2 into a -; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the -; sequence pshufd+pshufd+paddq+pshufd. - ; CHECK-LABEL: test2 ; CHECK-NOT: movsd -; CHECK: pshufd -; CHECK-NEXT: pshufd -; CHECK-NEXT: paddq -; CHECK-NEXT: pshufd +; CHECK: paddd ; CHECK-NEXT: ret @@ -91,7 +84,7 @@ define double @test6(double %A) { ; CHECK-LABEL: test6 ; CHECK-NOT: movsd ; CHECK: punpcklwd -; CHECK-NEXT: paddd +; CHECK-NEXT: paddw ; CHECK-NEXT: pshufb ; CHECK-NEXT: ret @@ -103,16 +96,10 @@ define double @test7(double %A, double %B) { %3 = bitcast <4 x i16> %add to double ret double %3 } -; FIXME: Ideally we should be able to fold the entire body of @test7 into a -; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the -; sequence pshufd+pshufd+paddd+pshufd. - ; CHECK-LABEL: test7 ; CHECK-NOT: movsd -; CHECK: punpcklwd -; CHECK-NEXT: punpcklwd -; CHECK-NEXT: paddd -; CHECK-NEXT: pshufb +; CHECK-NOT: punpcklwd +; CHECK: paddw ; CHECK-NEXT: ret @@ -129,7 +116,7 @@ define double @test8(double %A) { ; CHECK-LABEL: test8 ; CHECK-NOT: movsd ; CHECK: punpcklbw -; CHECK-NEXT: paddw +; CHECK-NEXT: paddb ; CHECK-NEXT: pshufb ; CHECK-NEXT: ret @@ -141,15 +128,9 @@ define double @test9(double %A, double %B) { %3 = bitcast <8 x i8> %add to double ret double %3 } -; FIXME: Ideally we should be able to fold the entire body of @test9 into a -; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the -; sequence pshufd+pshufd+paddw+pshufd. - ; CHECK-LABEL: test9 ; CHECK-NOT: movsd -; CHECK: punpcklbw -; CHECK-NEXT: punpcklbw -; CHECK-NEXT: paddw -; CHECK-NEXT: pshufb +; CHECK-NOT: punpcklbw +; CHECK: paddb ; CHECK-NEXT: ret |