diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsX86.td | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 32 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-intrinsics-x86.ll | 24 |
3 files changed, 17 insertions, 48 deletions
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 36d93fee8a..88ee1cec2d 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1304,15 +1304,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vbroadcast_ss : - GCCBuiltin<"__builtin_ia32_vbroadcastss">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx_vbroadcast_sd_256 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx_vbroadcast_ss_256 : - GCCBuiltin<"__builtin_ia32_vbroadcastss256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcastf128_pd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1eb04851b7..043b2f32c6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7969,6 +7969,16 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX; +class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, ValueType VT, + PatFrag ld_frag, SchedWrite Sched> : + AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>, + Sched<[Sched]>, VEX { + let mayLoad = 1; +} + // AVX2 adds register forms class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, Intrinsic Int, SchedWrite Sched> : @@ -7977,16 +7987,15 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX; let ExeDomain = SSEPackedSingle in { - def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, - int_x86_avx_vbroadcast_ss, WriteLoad>; - def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcast_ss_256, - WriteFShuffleLd>, VEX_L; + def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128, + f32mem, v4f32, loadf32, WriteLoad>; + def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256, + f32mem, v8f32, loadf32, + WriteFShuffleLd>, VEX_L; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, - int_x86_avx_vbroadcast_sd_256, - WriteFShuffleLd>, VEX_L; +def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem, + v4f64, loadf64, WriteFShuffleLd>, VEX_L; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256, WriteFShuffleLd>, VEX_L; @@ -8543,13 +8552,6 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), } let Predicates = [HasAVX] in { -def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDYrm addr:$src)>; -def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; - // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 0be83f648d..ce31161dbb 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2219,14 +2219,6 @@ define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind -define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { - ; CHECK: vbroadcastsd - %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly - - define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { ; CHECK: vbroadcastf128 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] @@ -2243,22 +2235,6 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly -define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) { - ; CHECK: vbroadcastss - %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly - - -define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) { - ; CHECK: vbroadcastss - %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1] - ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly - - define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { ; CHECK: vextractf128 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] |