summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp2
-rw-r--r--lib/Target/X86/X86InstrSSE.td4
-rw-r--r--test/CodeGen/X86/avx2-gather.ll18
3 files changed, 23 insertions, 1 deletions
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 02f5fe4952..14ec2ddb4e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2041,6 +2041,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_avx2_gather_d_d_256:
case Intrinsic::x86_avx2_gather_q_d:
case Intrinsic::x86_avx2_gather_q_d_256: {
+ if (!Subtarget->hasAVX2())
+ break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic");
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index cce7788fb3..a1d3e81311 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -8372,7 +8372,9 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
[]>, VEX_4VOp3, VEX_L;
}
-let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+let mayLoad = 1, Constraints
+ = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
+ in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll
new file mode 100644
index 0000000000..ee50c457fe
--- /dev/null
+++ b/test/CodeGen/X86/avx2-gather.ll
@@ -0,0 +1,18 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
+
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+ <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
+ <4 x i32> %idx, <4 x float> %mask) {
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_avx2_gather_d_ps
+; CHECK: vgatherdps
+; CHECK-NOT: [[DST]]
+; CHECK: [[DST:%xmm[0-9]+]]{{$}}
+; CHECK: ret