summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IntrinsicsX86.td13
-rw-r--r--lib/Target/X86/X86InstrSSE.td39
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll12
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll23
4 files changed, 66 insertions, 21 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 04a119f055..09f958c131 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1281,13 +1281,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_vbroadcastss :
+ def int_x86_avx_vbroadcast_ss :
GCCBuiltin<"__builtin_ia32_vbroadcastss">,
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
def int_x86_avx_vbroadcast_sd_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
- def int_x86_avx_vbroadcastss_256 :
+ def int_x86_avx_vbroadcast_ss_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
def int_x86_avx_vbroadcastf128_pd_256 :
@@ -1672,6 +1672,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx2_vbroadcast_ss_ps :
+ GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
+ def int_x86_avx2_vbroadcast_sd_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrReadMem]>;
+ def int_x86_avx2_vbroadcast_ss_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
def int_x86_avx2_vbroadcasti128 :
GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 77a9031090..8f7e27bab5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7083,35 +7083,48 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, VEX;
-def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcastss>;
-def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcastss_256>;
-def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
- int_x86_avx_vbroadcast_sd_256>;
+class avx_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ Intrinsic Int> :
+ AVX8I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int VR128:$src))]>, VEX;
+
+def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcast_ss>;
+def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcast_ss_256>;
+def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
-let Predicates = [HasAVX2] in
+let Predicates = [HasAVX2] in {
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
int_x86_avx2_vbroadcasti128>;
+def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128,
+ int_x86_avx2_vbroadcast_ss_ps>;
+def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256,
+ int_x86_avx2_vbroadcast_ss_ps_256>;
+def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
+}
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSY addr:$src)>;
+ (VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VBROADCASTSD addr:$src)>;
+ (VBROADCASTSDrm addr:$src)>;
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSY addr:$src)>;
+ (VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSD addr:$src)>;
+ (VBROADCASTSDrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSS addr:$src)>;
+ (VBROADCASTSSrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSS addr:$src)>;
+ (VBROADCASTSSrm addr:$src)>;
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 2aad672d2c..276209ea75 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2292,20 +2292,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
-define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
; CHECK: vbroadcastss
- %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
-define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
; CHECK: vbroadcastss
- %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 8aef255cae..81ffdea827 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -743,3 +743,26 @@ define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+ ; CHECK: vbroadcastsd
+ %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+ ; CHECK: vbroadcastss
+ %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+ ; CHECK: vbroadcastss
+ %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly