From 07786c2f091c5179719b56e74a51dfa8ba5e3dc4 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 3 Feb 2014 17:27:49 +0000 Subject: AArch64 & ARM: refactor crypto intrinsics to take scalars Some of the SHA instructions take a scalar i32 as one argument (largely because they work on 160-bit hash fragments). This wasn't reflected in the IR previously, with ARM and AArch64 choosing different types (<4 x i32> and <1 x i32> respectively) which was ugly. This makes all the affected intrinsics take a uniform "i32", allowing them to become non-polymorphic at the same time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200706 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/neon-crypto.ll | 63 ++++++++++++++++------------------- test/CodeGen/ARM/intrinsics-crypto.ll | 58 ++++++++++++++++---------------- 2 files changed, 59 insertions(+), 62 deletions(-) (limited to 'test') diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll index 0283e0e7ca..c0014fa387 100644 --- a/test/CodeGen/AArch64/neon-crypto.ll +++ b/test/CodeGen/AArch64/neon-crypto.ll @@ -1,40 +1,40 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s ; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s -declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32>, <1 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1 -declare <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32>, <1 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1 -declare <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32>, <1 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1 -declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) #1 +declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1 -declare <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32>) #1 +declare i32 @llvm.arm.neon.sha1h(i32) #1 -declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1 -declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1 -declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1 -declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1 define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) { ; CHECK: test_vaeseq_u8: ; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese entry: - %aese.i = tail call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %data, <16 x i8> %key) + %aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) ret <16 x i8> %aese.i } @@ -42,7 +42,7 @@ define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) { ; CHECK: test_vaesdq_u8: ; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: - %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %data, <16 x i8> %key) + %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) ret <16 x i8> %aesd.i } @@ -50,7 +50,7 @@ define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) { ; CHECK: test_vaesmcq_u8: ; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: - %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %data) + %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data) ret <16 x i8> %aesmc.i } @@ -58,7 +58,7 @@ define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) { ; CHECK: test_vaesimcq_u8: ; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: - %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %data) + %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data) ret <16 x i8> %aesimc.i } @@ -66,17 +66,15 @@ define i32 @test_vsha1h_u32(i32 %hash_e) { ; CHECK: test_vsha1h_u32: ; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}} entry: - %sha1h.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1h1.i = tail call <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32> %sha1h.i) - %0 = extractelement <1 x i32> %sha1h1.i, i32 0 - ret i32 %0 + %sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e) + ret i32 %sha1h1.i } define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) { ; CHECK: test_vsha1su1q_u32: ; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w12_15) + %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15) ret <4 x i32> %sha1su12.i } @@ -84,7 +82,7 @@ define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) { ; CHECK: test_vsha256su0q_u32: ; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7) + %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) ret <4 x i32> %sha256su02.i } @@ -92,8 +90,7 @@ define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> ; CHECK: test_vsha1cq_u32: ; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha1c.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1c1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32> %hash_abcd, <1 x i32> %sha1c.i, <4 x i32> %wk) + %sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %sha1c1.i } @@ -101,8 +98,7 @@ define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> ; CHECK: test_vsha1pq_u32: ; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha1p.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1p1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32> %hash_abcd, <1 x i32> %sha1p.i, <4 x i32> %wk) + %sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %sha1p1.i } @@ -110,8 +106,7 @@ define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> ; CHECK: test_vsha1mq_u32: ; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha1m.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0 - %sha1m1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32> %hash_abcd, <1 x i32> %sha1m.i, <4 x i32> %wk) + %sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) ret <4 x i32> %sha1m1.i } @@ -119,7 +114,7 @@ define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> ; CHECK: test_vsha1su0q_u32: ; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) + %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) ret <4 x i32> %sha1su03.i } @@ -127,7 +122,7 @@ define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, ; CHECK: test_vsha256hq_u32: ; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) + %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) ret <4 x i32> %sha256h3.i } @@ -135,7 +130,7 @@ define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd ; CHECK: test_vsha256h2q_u32: ; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s entry: - %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) + %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) ret <4 x i32> %sha256h23.i } @@ -143,7 +138,7 @@ define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x ; CHECK: test_vsha256su1q_u32: ; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: - %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) ret <4 x i32> %sha256su13.i } diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll index c038fe6da8..96413d341e 100644 --- a/test/CodeGen/ARM/intrinsics-crypto.ll +++ b/test/CodeGen/ARM/intrinsics-crypto.ll @@ -3,13 +3,13 @@ define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) { %tmp = load <16 x i8>* %a %tmp2 = load <16 x i8>* %b - %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2) ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}} - %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2) + %tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2) ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}} - %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4) + %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %tmp4) ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}} - %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5) + %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %tmp5) ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}} ret <16 x i8> %tmp6 } @@ -18,40 +18,42 @@ define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i3 %tmp = load <4 x i32>* %a %tmp2 = load <4 x i32>* %b %tmp3 = load <4 x i32>* %c - %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp) + %scalar = extractelement <4 x i32> %tmp, i32 0 + %resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar) + %res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0 ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}} - %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1) + %res2 = call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %tmp2, i32 %scalar, <4 x i32> %res1) ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1) + %res3 = call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %res2, i32 %scalar, <4 x i32> %res1) ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1) + %res4 = call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %res3, i32 %scalar, <4 x i32> %res1) ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1) + %res5 = call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1) + %res6 = call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %res5, <4 x i32> %res1) ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}} - %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1) + %res7 = call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1) + %res8 = call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1) + %res9 = call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1) ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} - %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3) + %res10 = call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %res9, <4 x i32> %tmp3) ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}} ret <4 x i32> %res10 } -declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) -declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) -declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) +declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) +declare i32 @llvm.arm.neon.sha1h(i32) +declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) -- cgit v1.2.3