summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2009-09-15 20:58:02 +0000
committerBob Wilson <bob.wilson@apple.com>2009-09-15 20:58:02 +0000
commit9b379dc52698a15ba474f570b5badf85b1903bfd (patch)
tree20ad2ce312299a614ba752d73f84a33f2511e93e /test/CodeGen
parent37be5903a69261ccb143c375433426f3f2fc99e6 (diff)
downloadllvm-9b379dc52698a15ba474f570b5badf85b1903bfd.tar.gz
llvm-9b379dc52698a15ba474f570b5badf85b1903bfd.tar.bz2
llvm-9b379dc52698a15ba474f570b5badf85b1903bfd.tar.xz
Convert more tests to FileCheck.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@81915 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/ARM/veor.ll20
-rw-r--r--test/CodeGen/ARM/vfcmp.ll44
-rw-r--r--test/CodeGen/ARM/vfp.ll43
-rw-r--r--test/CodeGen/ARM/vget_lane.ll27
-rw-r--r--test/CodeGen/ARM/vhadd.ll32
-rw-r--r--test/CodeGen/ARM/vhsub.ll32
6 files changed, 153 insertions, 45 deletions
diff --git a/test/CodeGen/ARM/veor.ll b/test/CodeGen/ARM/veor.ll
index a354f931e6..febceb41a9 100644
--- a/test/CodeGen/ARM/veor.ll
+++ b/test/CodeGen/ARM/veor.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+neon > %t
-; RUN: grep veor %t | count 8
-; Note: function names do not include "veor" to allow simple grep for opcodes
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp1, %tmp2
@@ -10,6 +10,8 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp1, %tmp2
@@ -17,6 +19,8 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp1, %tmp2
@@ -24,6 +28,8 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp1, %tmp2
@@ -31,6 +37,8 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp1, %tmp2
@@ -38,6 +46,8 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp1, %tmp2
@@ -45,6 +55,8 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp1, %tmp2
@@ -52,6 +64,8 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp1, %tmp2
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index 9b4fafcb2f..6946d02637 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -1,14 +1,12 @@
-; RUN: llc < %s -march=arm -mattr=+neon > %t
-; RUN: grep {vceq\\.f32} %t | count 1
-; RUN: grep {vcgt\\.f32} %t | count 9
-; RUN: grep {vcge\\.f32} %t | count 5
-; RUN: grep vorr %t | count 4
-; RUN: grep vmvn %t | count 7
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
; This tests fcmp operations that do not map directly to NEON instructions.
; une is implemented with VCEQ/VMVN
define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
@@ -18,6 +16,8 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; olt is implemented with VCGT
define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
@@ -27,6 +27,8 @@ define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ole is implemented with VCGE
define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
@@ -36,6 +38,9 @@ define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; uge is implemented with VCGT/VMVN
define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
@@ -45,6 +50,9 @@ define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ule is implemented with VCGT/VMVN
define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
@@ -54,6 +62,9 @@ define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ugt is implemented with VCGE/VMVN
define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
@@ -63,6 +74,9 @@ define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ult is implemented with VCGE/VMVN
define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
@@ -72,6 +86,11 @@ define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ueq is implemented with VCGT/VCGT/VORR/VMVN
define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
@@ -81,6 +100,10 @@ define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; one is implemented with VCGT/VCGT/VORR
define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
@@ -90,6 +113,11 @@ define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; uno is implemented with VCGT/VCGE/VORR/VMVN
define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
@@ -99,6 +127,10 @@ define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ord is implemented with VCGT/VCGE/VORR
define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index c6e5606829..50000e31e1 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,19 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep fabs | count 2
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep fmscs | count 1
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep fcvt | count 2
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep fuito | count 2
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep fto.i | count 4
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep bmi | count 1
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep bgt | count 1
-; RUN: llc < %s -march=arm -mattr=+vfp2 | \
-; RUN: grep fcmpezs | count 1
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
define void @test(float* %P, double* %D) {
%A = load float* %P ; <float> [#uses=1]
@@ -28,16 +13,20 @@ declare float @fabsf(float)
declare double @fabs(double)
define void @test_abs(float* %P, double* %D) {
+;CHECK: test_abs:
%a = load float* %P ; <float> [#uses=1]
+;CHECK: fabss
%b = call float @fabsf( float %a ) ; <float> [#uses=1]
store float %b, float* %P
%A = load double* %D ; <double> [#uses=1]
+;CHECK: fabsd
%B = call double @fabs( double %A ) ; <double> [#uses=1]
store double %B, double* %D
ret void
}
define void @test_add(float* %P, double* %D) {
+;CHECK: test_add:
%a = load float* %P ; <float> [#uses=2]
%b = fadd float %a, %a ; <float> [#uses=1]
store float %b, float* %P
@@ -48,9 +37,12 @@ define void @test_add(float* %P, double* %D) {
}
define void @test_ext_round(float* %P, double* %D) {
+;CHECK: test_ext_round:
%a = load float* %P ; <float> [#uses=1]
+;CHECK: fcvtds
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double* %D ; <double> [#uses=1]
+;CHECK: fcvtsd
%B = fptrunc double %A to float ; <float> [#uses=1]
store double %b, double* %D
store float %B, float* %P
@@ -58,9 +50,11 @@ define void @test_ext_round(float* %P, double* %D) {
}
define void @test_fma(float* %P1, float* %P2, float* %P3) {
+;CHECK: test_fma:
%a1 = load float* %P1 ; <float> [#uses=1]
%a2 = load float* %P2 ; <float> [#uses=1]
%a3 = load float* %P3 ; <float> [#uses=1]
+;CHECK: fmscs
%X = fmul float %a1, %a2 ; <float> [#uses=1]
%Y = fsub float %X, %a3 ; <float> [#uses=1]
store float %Y, float* %P1
@@ -68,42 +62,55 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
}
define i32 @test_ftoi(float* %P1) {
+;CHECK: test_ftoi:
%a1 = load float* %P1 ; <float> [#uses=1]
+;CHECK: ftosizs
%b1 = fptosi float %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define i32 @test_ftou(float* %P1) {
+;CHECK: test_ftou:
%a1 = load float* %P1 ; <float> [#uses=1]
+;CHECK: ftouizs
%b1 = fptoui float %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define i32 @test_dtoi(double* %P1) {
+;CHECK: test_dtoi:
%a1 = load double* %P1 ; <double> [#uses=1]
+;CHECK: ftosizd
%b1 = fptosi double %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define i32 @test_dtou(double* %P1) {
+;CHECK: test_dtou:
%a1 = load double* %P1 ; <double> [#uses=1]
+;CHECK: ftouizd
%b1 = fptoui double %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define void @test_utod(double* %P1, i32 %X) {
+;CHECK: test_utod:
+;CHECK: fuitod
%b1 = uitofp i32 %X to double ; <double> [#uses=1]
store double %b1, double* %P1
ret void
}
define void @test_utod2(double* %P1, i8 %X) {
+;CHECK: test_utod2:
+;CHECK: fuitod
%b1 = uitofp i8 %X to double ; <double> [#uses=1]
store double %b1, double* %P1
ret void
}
define void @test_cmp(float* %glob, i32 %X) {
+;CHECK: test_cmp:
entry:
%tmp = load float* %glob ; <float> [#uses=2]
%tmp3 = getelementptr float* %glob, i32 2 ; <float*> [#uses=1]
@@ -111,6 +118,8 @@ entry:
%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4 ; <i1> [#uses=1]
%tmp5 = fcmp uno float %tmp, %tmp4 ; <i1> [#uses=1]
%tmp6 = or i1 %tmp.upgrd.1, %tmp5 ; <i1> [#uses=1]
+;CHECK: bmi
+;CHECK-NEXT: bgt
br i1 %tmp6, label %cond_true, label %cond_false
cond_true: ; preds = %entry
@@ -129,8 +138,10 @@ declare i32 @bar(...)
declare i32 @baz(...)
define void @test_cmpfp0(float* %glob, i32 %X) {
+;CHECK: test_cmpfp0:
entry:
%tmp = load float* %glob ; <float> [#uses=1]
+;CHECK: fcmpezs
%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; <i1> [#uses=1]
br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index d82638d882..b4f093c5e0 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -1,11 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+neon > %t
-; RUN: grep {vmov\\.s8} %t | count 2
-; RUN: grep {vmov\\.s16} %t | count 2
-; RUN: grep {vmov\\.u8} %t | count 2
-; RUN: grep {vmov\\.u16} %t | count 2
-; RUN: grep {vmov\\.32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
%tmp3 = sext i8 %tmp2 to i32
@@ -13,6 +10,8 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
}
define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
%tmp3 = sext i16 %tmp2 to i32
@@ -20,6 +19,8 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
}
define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
%tmp3 = zext i8 %tmp2 to i32
@@ -27,6 +28,8 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
}
define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
%tmp3 = zext i16 %tmp2 to i32
@@ -35,6 +38,8 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
; Do a vector add to keep the extraction from being done directly from memory.
define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
%tmp1 = load <2 x i32>* %A
%tmp2 = add <2 x i32> %tmp1, %tmp1
%tmp3 = extractelement <2 x i32> %tmp2, i32 1
@@ -42,6 +47,8 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
}
define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
%tmp3 = sext i8 %tmp2 to i32
@@ -49,6 +56,8 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
}
define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
%tmp3 = sext i16 %tmp2 to i32
@@ -56,6 +65,8 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
}
define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
%tmp3 = zext i8 %tmp2 to i32
@@ -63,6 +74,8 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
}
define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
%tmp3 = zext i16 %tmp2 to i32
@@ -71,6 +84,8 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
; Do a vector add to keep the extraction from being done directly from memory.
define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
%tmp1 = load <4 x i32>* %A
%tmp2 = add <4 x i32> %tmp1, %tmp1
%tmp3 = extractelement <4 x i32> %tmp2, i32 1
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 1c8f941f0f..d7670971d6 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -1,12 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+neon > %t
-; RUN: grep {vhadd\\.s8} %t | count 2
-; RUN: grep {vhadd\\.s16} %t | count 2
-; RUN: grep {vhadd\\.s32} %t | count 2
-; RUN: grep {vhadd\\.u8} %t | count 2
-; RUN: grep {vhadd\\.u16} %t | count 2
-; RUN: grep {vhadd\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index 9411c3ec4a..0f0d0279a5 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -1,12 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+neon > %t
-; RUN: grep {vhsub\\.s8} %t | count 2
-; RUN: grep {vhsub\\.s16} %t | count 2
-; RUN: grep {vhsub\\.s32} %t | count 2
-; RUN: grep {vhsub\\.u8} %t | count 2
-; RUN: grep {vhsub\\.u16} %t | count 2
-; RUN: grep {vhsub\\.u32} %t | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -14,6 +10,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -21,6 +19,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -28,6 +28,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -35,6 +37,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -42,6 +46,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -49,6 +55,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -56,6 +64,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -63,6 +73,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -70,6 +82,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -77,6 +91,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -84,6 +100,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)