summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-06-25 17:41:58 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-06-25 17:41:58 +0000
commitcae1ea691d79ce786e9209cb5917036f42f13d7c (patch)
tree746c84fc614d04044dc59ea1e309b8a9adc8f2c8 /test/CodeGen
parent75f1d664b426f8d4892609f4c5f2e1308f6492d8 (diff)
downloadllvm-cae1ea691d79ce786e9209cb5917036f42f13d7c.tar.gz
llvm-cae1ea691d79ce786e9209cb5917036f42f13d7c.tar.bz2
llvm-cae1ea691d79ce786e9209cb5917036f42f13d7c.tar.xz
[X86] Always prefer to lower a VECTOR_SHUFFLE into a BLENDI instead of SHUFP (or VPERM2X128).
This patch teaches method 'LowerVECTOR_SHUFFLE' to give higher precedence to the check for 'isBlendMask'; the idea is that, when possible, we should firstly check if a shuffle performs a blend, and in case, try to lower it into a BLENDI instead of selecting a SHUFP or (worse) a VPERM2X128. In general: - AVX VBLENDPS/D always have better latency and throughput than VPERM2F128; - BLENDPS/D instructions tend to always have better 'reciprocal throughput' than the equivalent SHUFPS/D; - Both BLENDPS/D and SHUFPS/D are often decoded into the same number of m-ops; however, a m-op obtained from a BLENDPS/D can be scheduled to more than one execution port. This patch: - Moves the check for 'isBlendMask' immediately before the check for 'isSHUFPMask' within method 'LowerVECTOR_SHUFFLE'; - Updates existing tests for sse/avx shuffle/blend instructions to verify that we select (v)blendps/d when possible (instead of (v)shufps/d or vperm2f128). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211720 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/X86/avx-blend.ll2
-rw-r--r--test/CodeGen/X86/avx-shuffle.ll2
-rw-r--r--test/CodeGen/X86/avx-vperm2f128.ll2
-rw-r--r--test/CodeGen/X86/avx-vshufp.ll10
-rw-r--r--test/CodeGen/X86/combine-or.ll4
5 files changed, 10 insertions, 10 deletions
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 43cdf7edf7..d2a22d7094 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -110,7 +110,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
;CHECK-LABEL: vsel_double4:
;CHECK-NOT: vinsertf128
-;CHECK: vshufpd $10
+;CHECK: vblendpd $10
;CHECK-NEXT: ret
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index f3f7e554a3..4a996d7981 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -25,7 +25,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
ret <4 x i64> %c
; CHECK-LABEL: test3:
-; CHECK: vperm2f128
+; CHECK: vblendpd
; CHECK: ret
}
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
index caa21e5bac..c20775baca 100644
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -9,7 +9,7 @@ entry:
}
; CHECK: _B
-; CHECK: vperm2f128 $48
+; CHECK: vblendps $240
define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index 45883b7173..ad3dbc1ed8 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -32,14 +32,14 @@ entry:
ret <8 x i32> %shuffle
}
-; CHECK: vshufpd $10, %ymm
+; CHECK: vblendpd $10, %ymm
define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %shuffle
}
-; CHECK: vshufpd $10, (%{{.*}}), %ymm
+; CHECK: vblendpd $10, (%{{.*}}), %ymm
define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
entry:
%a2 = load <4 x double>* %a
@@ -48,14 +48,14 @@ entry:
ret <4 x double> %shuffle
}
-; CHECK: vshufpd $10, %ymm
+; CHECK: vblendpd $10, %ymm
define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x i64> %shuffle
}
-; CHECK: vshufpd $10, (%{{.*}}), %ymm
+; CHECK: vblendpd $10, (%{{.*}}), %ymm
define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
entry:
%a2 = load <4 x i64>* %a
@@ -71,7 +71,7 @@ entry:
ret <8 x float> %shuffle
}
-; CHECK: vshufpd $2, %ymm
+; CHECK: vblendpd $2, %ymm
define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index 572aded5e9..ff807b9871 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -74,7 +74,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
}
; CHECK-LABEL: test6
; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: blendps $12
; CHECK-NEXT: ret
@@ -86,7 +86,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
}
; CHECK-LABEL: test7
; CHECK-NOT: xorps
-; CHECK: shufps
+; CHECK: blendps $12
; CHECK-NEXT: ret