From 07786c2f091c5179719b56e74a51dfa8ba5e3dc4 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 3 Feb 2014 17:27:49 +0000 Subject: AArch64 & ARM: refactor crypto intrinsics to take scalars Some of the SHA instructions take a scalar i32 as one argument (largely because they work on 160-bit hash fragments). This wasn't reflected in the IR previously, with ARM and AArch64 choosing different types (<4 x i32> and <1 x i32> respectively) which was ugly. This makes all the affected intrinsics take a uniform "i32", allowing them to become non-polymorphic at the same time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 34 ++++++++++++++++++------------ lib/Target/ARM/ARMInstrNEON.td | 38 +++++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 2cf27b861b..edb619e1e7 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -8863,13 +8863,15 @@ class NeonI_Cryptosha_ss size, bits<5> opcode, : NeonI_Crypto_SHA { + [], NoItinerary> { let Predicates = [HasNEON, HasCrypto]; + let hasSideEffects = 0; } def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>; +def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), + (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>; + class NeonI_Cryptosha3_vvv size, bits<3> opcode, string asmop, SDPatternOperator opnode> @@ -8911,24 +8913,30 @@ def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h", def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2", int_arm_neon_sha256h2>; -class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop, - SDPatternOperator opnode> +class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop> : NeonI_Crypto_3VSHA { + [], NoItinerary> { let Constraints = "$src = $Rd"; + let hasSideEffects = 0; let Predicates = [HasNEON, HasCrypto]; } -def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>; -def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>; -def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>; +def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">; +def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">; +def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">; + +def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), + (SHA1C v4i32:$hash_abcd, + (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; +def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), + (SHA1M v4i32:$hash_abcd, + (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; +def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), + (SHA1P v4i32:$hash_abcd, + (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; // Additional patterns to match shl to USHL. def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3d993eb964..bbf8590935 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5876,7 +5876,7 @@ defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; // Cryptography instructions let PostEncoderMethod = "NEONThumb2DataIPostEncoder", - DecoderNamespace = "v8Crypto" in { + DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { class AES : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, !strconcat("aes", op), "8", v16i8, v16i8, Int>, @@ -5906,17 +5906,45 @@ def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; -def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; -def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; -def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; -def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; +def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), + (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG + (SHA1H (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), + ssub_0)), + ssub_0)), GPR)>; + +def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1C v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1M v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1P v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// -- cgit v1.2.3