diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsAArch64.td | 23 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 27 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll | 87 |
3 files changed, 61 insertions, 76 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 68af8c1164..bb1025e3c1 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -107,9 +107,6 @@ def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic; class Neon_Across_Intrinsic : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; -class Neon_2Arg_Across_Float_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_aarch64_neon_saddlv : Neon_Across_Intrinsic; def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic; def int_aarch64_neon_smaxv : Neon_Across_Intrinsic; @@ -233,29 +230,19 @@ def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic; def int_aarch64_neon_vpadd : Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>; def int_aarch64_neon_vpfadd : - Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vpfaddq : - Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; // Scalar Reduce Pairwise Floating Point Max/Min. def int_aarch64_neon_vpmax : - Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vpmaxq : - Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; def int_aarch64_neon_vpmin : - Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vpminq : - Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; // Scalar Reduce Pairwise Floating Point Maxnm/Minnm. def int_aarch64_neon_vpfmaxnm : - Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vpfmaxnmq : - Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; def int_aarch64_neon_vpfminnm : - Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; -def int_aarch64_neon_vpfminnmq : - Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; // Scalar Signed Integer Convert To Floating-point def int_aarch64_neon_vcvtf32_s32 : diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 04167a14bb..99328c81a0 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -5307,35 +5307,34 @@ defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; // Scalar Reduce minNum Pairwise (Floating Point) defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; -multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS, - SDPatternOperator opnodeD, +multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode, Instruction INSTS, Instruction INSTD> { - def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), + def : Pat<(v1f32 (opnode (v2f32 VPR64:$Rn))), (INSTS VPR64:$Rn)>; - def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), + def : Pat<(v1f64 (opnode (v2f64 VPR128:$Rn))), (INSTD VPR128:$Rn)>; } // Patterns to match llvm.aarch64.* intrinsic for // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd, - int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>; + FADDPvv_S_2S, FADDPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax, - int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>; + FMAXPvv_S_2S, FMAXPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin, - int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>; + FMINPvv_S_2S, FMINPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm, - int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; + FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, - int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; + FMINNMPvv_S_2S, FMINNMPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vaddv, - int_aarch64_neon_vaddv, FADDPvv_S_2S, FADDPvv_D_2D>; + FADDPvv_S_2S, FADDPvv_D_2D>; def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))), (FADDPvv_S_2S (v2f32 @@ -5344,16 +5343,16 @@ def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))), sub_64)))>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxv, - int_aarch64_neon_vmaxv, FMAXPvv_S_2S, FMAXPvv_D_2D>; + FMAXPvv_S_2S, FMAXPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminv, - int_aarch64_neon_vminv, FMINPvv_S_2S, FMINPvv_D_2D>; + FMINPvv_S_2S, FMINPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxnmv, - int_aarch64_neon_vmaxnmv, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; + FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminnmv, - int_aarch64_neon_vminnmv, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; + FMINNMPvv_S_2S, FMINNMPvv_D_2D>; // Scalar by element Arithmetic diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll index 80e8dc339d..401ceec255 100644 --- a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll +++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll @@ -4,101 +4,100 @@ declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { ; CHECK: test_addp_v1i64: - %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) -; CHECK: addp d0, v0.2d - ret <1 x i64> %val +; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) + ret <1 x i64> %val } -declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>) +declare <1 x float> @llvm.aarch64.neon.vpfadd.v1f32.v2f32(<2 x float>) define <1 x float> @test_faddp_v1f32(<2 x float> %a) { ; CHECK: test_faddp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a) -; CHECK: faddp s0, v0.2s - ret <1 x float> %val +; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call <1 x float> @llvm.aarch64.neon.vpfadd.v1f32.v2f32(<2 x float> %a) + ret <1 x float> %val } -declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.vpfadd.v1f64.v2f64(<2 x double>) define <1 x double> @test_faddp_v1f64(<2 x double> %a) { ; CHECK: test_faddp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a) -; CHECK: faddp d0, v0.2d - ret <1 x double> %val +; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x double> @llvm.aarch64.neon.vpfadd.v1f64.v2f64(<2 x double> %a) + ret <1 x double> %val } -declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>) +declare <1 x float> @llvm.aarch64.neon.vpmax.v1f32.v2f32(<2 x float>) define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) { ; CHECK: test_fmaxp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a) -; CHECK: fmaxp s0, v0.2s - ret <1 x float> %val +; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call <1 x float> @llvm.aarch64.neon.vpmax.v1f32.v2f32(<2 x float> %a) + ret <1 x float> %val } -declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.vpmax.v1f64.v2f64(<2 x double>) define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) { ; CHECK: test_fmaxp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a) -; CHECK: fmaxp d0, v0.2d - ret <1 x double> %val +; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x double> @llvm.aarch64.neon.vpmax.v1f64.v2f64(<2 x double> %a) + ret <1 x double> %val } - -declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>) +declare <1 x float> @llvm.aarch64.neon.vpmin.v1f32.v2f32(<2 x float>) define <1 x float> @test_fminp_v1f32(<2 x float> %a) { ; CHECK: test_fminp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a) -; CHECK: fminp s0, v0.2s - ret <1 x float> %val +; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call <1 x float> @llvm.aarch64.neon.vpmin.v1f32.v2f32(<2 x float> %a) + ret <1 x float> %val } -declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.vpmin.v1f64.v2f64(<2 x double>) define <1 x double> @test_fminp_v1f64(<2 x double> %a) { ; CHECK: test_fminp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a) -; CHECK: fminp d0, v0.2d - ret <1 x double> %val +; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x double> @llvm.aarch64.neon.vpmin.v1f64.v2f64(<2 x double> %a) + ret <1 x double> %val } -declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>) +declare <1 x float> @llvm.aarch64.neon.vpfmaxnm.v1f32.v2f32(<2 x float>) define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) { ; CHECK: test_fmaxnmp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a) -; CHECK: fmaxnmp s0, v0.2s - ret <1 x float> %val +; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm.v1f32.v2f32(<2 x float> %a) + ret <1 x float> %val } -declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.vpfmaxnm.v1f64.v2f64(<2 x double>) define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) { ; CHECK: test_fmaxnmp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a) -; CHECK: fmaxnmp d0, v0.2d - ret <1 x double> %val +; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnm.v1f64.v2f64(<2 x double> %a) + ret <1 x double> %val } -declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>) +declare <1 x float> @llvm.aarch64.neon.vpfminnm.v1f32.v2f32(<2 x float>) define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) { ; CHECK: test_fminnmp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a) -; CHECK: fminnmp s0, v0.2s - ret <1 x float> %val +; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call <1 x float> @llvm.aarch64.neon.vpfminnm.v1f32.v2f32(<2 x float> %a) + ret <1 x float> %val } -declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>) +declare <1 x double> @llvm.aarch64.neon.vpfminnm.v1f64.v2f64(<2 x double>) define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) { ; CHECK: test_fminnmp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a) -; CHECK: fminnmp d0, v0.2d - ret <1 x double> %val +; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x double> @llvm.aarch64.neon.vpfminnm.v1f64.v2f64(<2 x double> %a) + ret <1 x double> %val } define float @test_vaddv_f32(<2 x float> %a) { |