summaryrefslogtreecommitdiff
path: root/test/CodeGen/ARM
diff options
context:
space:
mode:
authorRenato Golin <renato.golin@linaro.org>2013-03-21 18:47:47 +0000
committerRenato Golin <renato.golin@linaro.org>2013-03-21 18:47:47 +0000
commit3382a840747c42c4a98eac802ee7b347a8ded1e4 (patch)
treede4c0a5050772a0b2c7d3b0f1e771814d0f24fe9 /test/CodeGen/ARM
parentb55b00b4d4b79daec2be43a2a6cd0f5891370296 (diff)
downloadllvm-3382a840747c42c4a98eac802ee7b347a8ded1e4.tar.gz
llvm-3382a840747c42c4a98eac802ee7b347a8ded1e4.tar.bz2
llvm-3382a840747c42c4a98eac802ee7b347a8ded1e4.tar.xz
Avoid NEON SP-FP unless unsafe-math or Darwin
NEON is not IEEE 754 compliant, so we should avoid lowering single-precision floating point operations with NEON unless unsafe-math is turned on. The equivalent VFP instructions are IEEE 754 compliant, but in some cores they're much slower, so some archs/OSs might still request it to be on by default, such as Swift and Darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177651 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r--test/CodeGen/ARM/fadds.ll7
-rw-r--r--test/CodeGen/ARM/fmuls.ll7
-rw-r--r--test/CodeGen/ARM/fnegs.ll11
-rw-r--r--test/CodeGen/ARM/fnmscs.ll21
-rw-r--r--test/CodeGen/ARM/fp_convert.ll3
-rw-r--r--test/CodeGen/ARM/fsubs.ll4
-rw-r--r--test/CodeGen/ARM/neon-spfp.ll76
-rw-r--r--test/CodeGen/ARM/neon_minmax.ll2
8 files changed, 120 insertions, 11 deletions
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 48ef5ed88f..5aabc6685f 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
define float @test(float %a, float %b) {
@@ -18,6 +19,8 @@ entry:
; NFP0: vadd.f32 s
; CORTEXA8: test:
-; CORTEXA8: vadd.f32 d
+; CORTEXA8: vadd.f32 s
+; CORTEXA8U: test:
+; CORTEXA8U: vadd.f32 d
; CORTEXA9: test:
-; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: vadd.f32 s
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index 1566a9272d..95698807cb 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
define float @test(float %a, float %b) {
@@ -18,9 +19,11 @@ entry:
; NFP0: vmul.f32 s
; CORTEXA8: test:
-; CORTEXA8: vmul.f32 d
+; CORTEXA8: vmul.f32 s
+; CORTEXA8U: test:
+; CORTEXA8U: vmul.f32 d
; CORTEXA9: test:
-; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: vmul.f32 s
; VFP2: test2
define float @test2(float %a) nounwind {
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 418b59803d..30549b08af 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
define float @test1(float* %a) {
@@ -22,7 +23,10 @@ entry:
; NFP0: vneg.f32 s{{.*}}, s{{.*}}
; CORTEXA8: test1:
-; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
+; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test1:
+; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
; CORTEXA9: test1:
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
@@ -46,7 +50,10 @@ entry:
; NFP0: vneg.f32 s{{.*}}, s{{.*}}
; CORTEXA8: test2:
-; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}}
+; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test2:
+; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
; CORTEXA9: test2:
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 9ce9b7ae7d..b5b421191f 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
define float @t1(float %acc, float %a, float %b) nounwind {
@@ -11,9 +12,13 @@ entry:
; NEON: t1:
; NEON: vnmla.f32
+; A8U: t1:
+; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
; A8: t1:
; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
%0 = fmul float %a, %b
%1 = fsub float -0.0, %0
%2 = fsub float %1, %acc
@@ -28,9 +33,13 @@ entry:
; NEON: t2:
; NEON: vnmla.f32
+; A8U: t2:
+; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
; A8: t2:
; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
%0 = fmul float %a, %b
%1 = fmul float -1.0, %0
%2 = fsub float %1, %acc
@@ -45,6 +54,10 @@ entry:
; NEON: t3:
; NEON: vnmla.f64
+; A8U: t3:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
; A8: t3:
; A8: vnmul.f64 d
; A8: vsub.f64 d
@@ -62,6 +75,10 @@ entry:
; NEON: t4:
; NEON: vnmla.f64
+; A8U: t4:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
; A8: t4:
; A8: vnmul.f64 d
; A8: vsub.f64 d
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 44298b9c5d..51f3890684 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
define i32 @test1(float %a, float %b) {
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index f039e74c8e..3013e56004 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
@@ -9,5 +10,6 @@ entry:
}
; VFP2: vsub.f32 s
-; NFP1: vsub.f32 d
+; NFP1U: vsub.f32 d
+; NFP1: vsub.f32 s
; NFP0: vsub.f32 s
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
new file mode 100644
index 0000000000..c00f0d17c9
--- /dev/null
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
+
+; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
+; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
+
+@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
+
+; CHECK-LINUXA5: main:
+; CHECK-LINUXA8: main:
+; CHECK-LINUXA9: main:
+; CHECK-LINUXA15: main:
+; CHECK-LINUXSWIFT: main:
+; CHECK-UNSAFEA5: main:
+; CHECK-UNSAFEA8: main:
+; CHECK-UNSAFEA9: main:
+; CHECK-UNSAFEA15: main:
+; CHECK-UNSAFESWIFT: main:
+; CHECK-DARWINA5: main:
+; CHECK-DARWINA8: main:
+; CHECK-DARWINA9: main:
+; CHECK-DARWINA15: main:
+; CHECK-DARWINSWIFT: main:
+define i32 @main() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ %mul = fmul float %q.03, 0x3FEFAE1480000000
+; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}}
+; Swift is *always* unsafe
+; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}}
+; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}}
+; A9 and A15 don't need this
+; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
+ %conv = fpext float %mul to double
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
+ %inc = add nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, 16000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...)
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index d301c6a4ca..0a7c8b2b6a 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
define float @fmin_ole(float %x) nounwind {
;CHECK: fmin_ole: