diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsAArch64.td | 30 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 43 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-misc.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-fcvt.ll | 108 |
4 files changed, 103 insertions, 84 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 71296e11c0..f4e7b01c0e 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -36,11 +36,11 @@ def int_aarch64_neon_xtn : // Vector floating-point convert def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic; def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic; -def int_aarch64_neon_fcvtxn : +def int_aarch64_neon_vcvtxn : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtzs : +def int_aarch64_neon_vcvtzs : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; -def int_aarch64_neon_fcvtzu : +def int_aarch64_neon_vcvtzu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; // Vector maxNum (Floating Point) @@ -240,6 +240,30 @@ def int_aarch64_neon_vcvtf32_u32 : def int_aarch64_neon_vcvtf64_u64 : Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>; +// Scalar Floating-point Convert +def int_aarch64_neon_fcvtxn : + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtns : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtnu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtps : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtpu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtms : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtmu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtas : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtau : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtzs : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtzu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>; + // Scalar Floating-point Reciprocal Exponent def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic; diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 84e72a2173..aa241ebc5f 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4307,18 +4307,23 @@ multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode, class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> - : Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))), + : Pat<(f32 (opnode (f64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode, Instruction INSTS, Instruction INSTD> { - def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))), + def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))), (INSTS FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), + def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; } +class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode, + Instruction INSTD> + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; + multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode, SDPatternOperator Dopnode, Instruction INSTS, @@ -4982,44 +4987,56 @@ def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn, FCVTXN>; defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtns, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns, FCVTNSss, FCVTNSdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>; defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtnu, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu, FCVTNUss, FCVTNUdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>; defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtms, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms, FCVTMSss, FCVTMSdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>; defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtmu, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu, FCVTMUss, FCVTMUdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>; defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtas, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas, FCVTASss, FCVTASdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>; defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtau, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau, FCVTAUss, FCVTAUdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>; defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtps, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps, FCVTPSss, FCVTPSdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>; defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtpu, +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu, FCVTPUss, FCVTPUdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>; defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs, FCVTZSss, FCVTZSdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs, + FCVTZSdd>; defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu, FCVTZUss, FCVTZUdd>; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu, + FCVTZUdd>; // Patterns For Convert Instructions Between v1f64 and v1i64 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode, @@ -8297,12 +8314,12 @@ multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U, let Constraints = "$src = $Rd"; } - def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))), + def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))), (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>; def : Pat<(v4f32 (concat_vectors (v2f32 VPR64:$src), - (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))), + (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))), (!cast<Instruction>(prefix # "2d4s") (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), VPR128:$Rn)>; diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll index 8510076482..ed0cc0fc41 100644 --- a/test/CodeGen/AArch64/neon-misc.ll +++ b/test/CodeGen/AArch64/neon-misc.ll @@ -894,13 +894,13 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { ; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4 + %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4 ret <2 x float> %vcvtx_f32_f641.i } define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4 + %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x float> %shuffle.i } @@ -1462,7 +1462,7 @@ declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2 declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2 -declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2 +declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2 declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2 diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll index 460be2c2c3..6cf30a7df3 100644 --- a/test/CodeGen/AArch64/neon-scalar-fcvt.ll +++ b/test/CodeGen/AArch64/neon-scalar-fcvt.ll @@ -6,250 +6,228 @@ define float @test_vcvtxn(double %a) { ; CHECK: test_vcvtxn ; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}} entry: - %vcvtf.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i) - %0 = extractelement <1 x float> %vcvtf1.i, i32 0 - ret float %0 + %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a) + ret float %vcvtf } -declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.fcvtxn(double) define i32 @test_vcvtass(float %a) { ; CHECK: test_vcvtass ; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtas1.i = call <1 x i32> @llvm.arm.neon.vcvtas.v1i32.v1f32(<1 x float> %vcvtas.i) + %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtas1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtas.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float) define i64 @test_test_vcvtasd(double %a) { ; CHECK: test_test_vcvtasd ; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtas1.i = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %vcvtas.i) + %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtas1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double) define i32 @test_vcvtaus(float %a) { ; CHECK: test_vcvtaus ; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtau1.i = call <1 x i32> @llvm.arm.neon.vcvtau.v1i32.v1f32(<1 x float> %vcvtau.i) + %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtau1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtau.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float) define i64 @test_vcvtaud(double %a) { ; CHECK: test_vcvtaud ; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtau1.i = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %vcvtau.i) + %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtau1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) define i32 @test_vcvtmss(float %a) { ; CHECK: test_vcvtmss ; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtms1.i = call <1 x i32> @llvm.arm.neon.vcvtms.v1i32.v1f32(<1 x float> %vcvtms.i) + %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtms1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtms.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float) define i64 @test_vcvtmd_s64_f64(double %a) { ; CHECK: test_vcvtmd_s64_f64 ; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtms1.i = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %vcvtms.i) + %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtms1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double) define i32 @test_vcvtmus(float %a) { ; CHECK: test_vcvtmus ; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtmu1.i = call <1 x i32> @llvm.arm.neon.vcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i) + %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtmu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float) define i64 @test_vcvtmud(double %a) { ; CHECK: test_vcvtmud ; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtmu1.i = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i) + %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double) define i32 @test_vcvtnss(float %a) { ; CHECK: test_vcvtnss ; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtns1.i = call <1 x i32> @llvm.arm.neon.vcvtns.v1i32.v1f32(<1 x float> %vcvtns.i) + %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtns1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtns.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float) define i64 @test_vcvtnd_s64_f64(double %a) { ; CHECK: test_vcvtnd_s64_f64 ; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtns1.i = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %vcvtns.i) + %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtns1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double) define i32 @test_vcvtnus(float %a) { ; CHECK: test_vcvtnus ; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtnu1.i = call <1 x i32> @llvm.arm.neon.vcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i) + %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtnu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float) define i64 @test_vcvtnud(double %a) { ; CHECK: test_vcvtnud ; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtnu1.i = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i) + %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double) define i32 @test_vcvtpss(float %a) { ; CHECK: test_vcvtpss ; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtps1.i = call <1 x i32> @llvm.arm.neon.vcvtps.v1i32.v1f32(<1 x float> %vcvtps.i) + %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtps1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtps.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float) define i64 @test_vcvtpd_s64_f64(double %a) { ; CHECK: test_vcvtpd_s64_f64 ; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtps1.i = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %vcvtps.i) + %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtps1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double) define i32 @test_vcvtpus(float %a) { ; CHECK: test_vcvtpus ; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtpu1.i = call <1 x i32> @llvm.arm.neon.vcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i) + %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.arm.neon.vcvtpu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float) define i64 @test_vcvtpud(double %a) { ; CHECK: test_vcvtpud ; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtpu1.i = tail call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i) + %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double) define i32 @test_vcvtss(float %a) { ; CHECK: test_vcvtss ; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i) + %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float) define i64 @test_vcvtd_s64_f64(double %a) { ; CHECK: test_vcvtd_s64_f64 ; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}} entry: - %vcvzs.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i) + %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvzs1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double) define i32 @test_vcvtus(float %a) { ; CHECK: test_vcvtus ; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i) + %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float) define i64 @test_vcvtud(double %a) { ; CHECK: test_vcvtud ; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i) + %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double) |