summaryrefslogtreecommitdiff
path: root/test/CodeGen/ARM/neon_cmp.ll
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-02-20 21:33:32 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-02-20 21:33:32 +0000
commitc46e2df74cf75a33742f57d2b4d6c6fcf73bced9 (patch)
tree2527c3b68b24b2e2fb335d6c2988c382ee9e950a /test/CodeGen/ARM/neon_cmp.ll
parent64f3e763cd8e4f32f91ae5b44ac4bd9986afddf2 (diff)
downloadllvm-c46e2df74cf75a33742f57d2b4d6c6fcf73bced9.tar.gz
llvm-c46e2df74cf75a33742f57d2b4d6c6fcf73bced9.tar.bz2
llvm-c46e2df74cf75a33742f57d2b4d6c6fcf73bced9.tar.xz
DAGCombiner: Fold pointless truncate, bitcast, buildvector series
(2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y))) can be folded into a (2xi32) (buildvector i32 a, i32 b). Such a DAG would cause uneccessary vdup instructions followed by vmovn instructions. We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in the vectorized version of the code below. double A[N]; double B[N]; void test_double_compare_to_double() { int i; for(i=0;i<N;i++) A[i] = (double)(A[i] < B[i]); } radar://13191881 Fixes bug 15283. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175670 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM/neon_cmp.ll')
-rw-r--r--test/CodeGen/ARM/neon_cmp.ll15
1 files changed, 15 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll
new file mode 100644
index 0000000000..046b5da228
--- /dev/null
+++ b/test/CodeGen/ARM/neon_cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; bug 15283
+; radar://13191881
+; CHECK: vfcmp
+define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
+ %wide.load = load <2 x double>* %a, align 4
+ %wide.load2 = load <2 x double>* %b, align 4
+; CHECK-NOT: vdup.32
+; CHECK-NOT: vmovn.i64
+ %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
+ %v2 = zext <2 x i1> %v1 to <2 x i32>
+ %v3 = sitofp <2 x i32> %v2 to <2 x double>
+ store <2 x double> %v3, <2 x double>* %b, align 4
+ ret void
+}