diff options
author | Tim Northover <tnorthover@apple.com> | 2014-02-04 14:55:42 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-02-04 14:55:42 +0000 |
commit | f9ced85e49f530c1cc3559b881c4b28a29408985 (patch) | |
tree | ade9a3397acf4b9fe634632ece72756d0a1fedff /test/CodeGen | |
parent | 8753ba91d28cd0fe7e6767466dec320b1fe2af86 (diff) | |
download | llvm-f9ced85e49f530c1cc3559b881c4b28a29408985.tar.gz llvm-f9ced85e49f530c1cc3559b881c4b28a29408985.tar.bz2 llvm-f9ced85e49f530c1cc3559b881c4b28a29408985.tar.xz |
ARM & AArch64: merge NEON absolute compare intrinsics
There was an extremely confusing proliferation of LLVM intrinsics to implement
the vacge & vacgt instructions. This combines them all into two polymorphic
intrinsics, shared across both backends.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200768 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/AArch64/neon-facge-facgt.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-compare.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/ARM/vcge.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/ARM/vcgt.ll | 8 |
4 files changed, 26 insertions, 26 deletions
diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll index 146256e4be..28e821222f 100644 --- a/test/CodeGen/AArch64/neon-facge-facgt.ll +++ b/test/CodeGen/AArch64/neon-facge-facgt.ll @@ -1,20 +1,20 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>) +declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) +declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) +declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>) define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B) + %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B) ; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ret <2 x i32> %val } define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B) + %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B) ; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ret <4 x i32> %val } @@ -22,26 +22,26 @@ define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v2i64: - %val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B) + %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B) ; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d ret <2 x i64> %val } -declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>) +declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) +declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) +declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>) define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B) + %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B) ; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ret <2 x i32> %val } define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B) + %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B) ; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ret <4 x i32> %val } @@ -49,7 +49,7 @@ define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v2i64: - %val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B) + %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B) ; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d ret <2 x i64> %val } diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll index d1b5f9c254..e1f39645f1 100644 --- a/test/CodeGen/AArch64/neon-scalar-compare.ll +++ b/test/CodeGen/AArch64/neon-scalar-compare.ll @@ -122,28 +122,28 @@ entry: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcage_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 ret <1 x i64> %vcage2.i } define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcagt_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 ret <1 x i64> %vcagt2.i } define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcale_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 ret <1 x i64> %vcage2.i } define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcalt_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 ret <1 x i64> %vcagt2.i } @@ -331,8 +331,8 @@ define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { ret <1 x i64> %vcltz.i } -declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>) declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll index 81a59dbdfe..df72835ae2 100644 --- a/test/CodeGen/ARM/vcge.ll +++ b/test/CodeGen/ARM/vcge.ll @@ -145,7 +145,7 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: vacge.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -154,12 +154,12 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vacge.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } -declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vcgei8Z: diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index 056866fe99..adee76a2fb 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -146,7 +146,7 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: vacgt.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -155,7 +155,7 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vacgt.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } @@ -172,8 +172,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x i32> %tmp4 } -declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vcgti8Z: |