summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManman Ren <mren@apple.com>2012-06-29 00:54:20 +0000
committerManman Ren <mren@apple.com>2012-06-29 00:54:20 +0000
commit40307c7dbe2d104784763c28697d7926793674af (patch)
tree726fca55dc107f1d5cc6e47f15bec61fe252a0c5
parentcfc49bfd3f7f4033b51979aca3fa7f998652d146 (diff)
downloadllvm-40307c7dbe2d104784763c28697d7926793674af.tar.gz
llvm-40307c7dbe2d104784763c28697d7926793674af.tar.bz2
llvm-40307c7dbe2d104784763c28697d7926793674af.tar.xz
X86: add more GATHER intrinsics in LLVM
Corrected type for index of llvm.x86.avx2.gather.d.pd.256 from 256-bit to 128-bit. Corrected types for src|dst|mask of llvm.x86.avx2.gather.q.ps.256 from 256-bit to 128-bit. Support the following intrinsics: llvm.x86.avx2.gather.d.q, llvm.x86.avx2.gather.q.q llvm.x86.avx2.gather.d.q.256, llvm.x86.avx2.gather.q.q.256 llvm.x86.avx2.gather.d.d, llvm.x86.avx2.gather.q.d llvm.x86.avx2.gather.d.d.256, llvm.x86.avx2.gather.q.d.256 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159402 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsX86.td39
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp28
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp16
-rw-r--r--lib/Target/X86/X86InstrInfo.td6
-rw-r--r--lib/Target/X86/X86InstrSSE.td33
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll100
-rw-r--r--test/MC/Disassembler/X86/simple-tests.txt20
-rw-r--r--test/MC/X86/x86_64-avx-encoding.s28
8 files changed, 231 insertions, 39 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index ba38c3ce13..14fd76d213 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1752,7 +1752,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrReadMem]>;
def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
Intrinsic<[llvm_v2f64_ty],
@@ -1775,8 +1775,41 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v8f32_ty, llvm_i8_ty],
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+
+ def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrReadMem]>;
+ def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem]>;
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index b13e1ca41c..d58e36c803 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -506,18 +506,26 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
// We can tell whether it is VSIB or SIB after instruction ID is decoded,
// but instruction ID may not be decoded yet when calling readSIB.
uint32_t Opcode = mcInst.getOpcode();
- bool IsGather = (Opcode == X86::VGATHERDPDrm ||
- Opcode == X86::VGATHERQPDrm ||
- Opcode == X86::VGATHERDPSrm ||
- Opcode == X86::VGATHERQPSrm);
- bool IsGatherY = (Opcode == X86::VGATHERDPDYrm ||
- Opcode == X86::VGATHERQPDYrm ||
- Opcode == X86::VGATHERDPSYrm ||
- Opcode == X86::VGATHERQPSYrm);
- if (IsGather || IsGatherY) {
+ bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
+ Opcode == X86::VGATHERDPDYrm ||
+ Opcode == X86::VGATHERQPDrm ||
+ Opcode == X86::VGATHERDPSrm ||
+ Opcode == X86::VGATHERQPSrm ||
+ Opcode == X86::VPGATHERDQrm ||
+ Opcode == X86::VPGATHERDQYrm ||
+ Opcode == X86::VPGATHERQQrm ||
+ Opcode == X86::VPGATHERDDrm ||
+ Opcode == X86::VPGATHERQDrm);
+ bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
+ Opcode == X86::VGATHERDPSYrm ||
+ Opcode == X86::VGATHERQPSYrm ||
+ Opcode == X86::VPGATHERQQYrm ||
+ Opcode == X86::VPGATHERDDYrm ||
+ Opcode == X86::VPGATHERQDYrm);
+ if (IndexIs128 || IndexIs256) {
unsigned IndexOffset = insn.sibIndex -
(insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
- SIBIndex IndexBase = IsGatherY ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
+ SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
insn.sibIndex = (SIBIndex)(IndexBase +
(insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index ea9e5bcf18..cad90f48cb 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2011,6 +2011,22 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return SelectGather(Node, X86::VGATHERQPSrm);
case Intrinsic::x86_avx2_gather_q_ps_256:
return SelectGather(Node, X86::VGATHERQPSYrm);
+ case Intrinsic::x86_avx2_gather_d_q:
+ return SelectGather(Node, X86::VPGATHERDQrm);
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ return SelectGather(Node, X86::VPGATHERDQYrm);
+ case Intrinsic::x86_avx2_gather_q_q:
+ return SelectGather(Node, X86::VPGATHERQQrm);
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ return SelectGather(Node, X86::VPGATHERQQYrm);
+ case Intrinsic::x86_avx2_gather_d_d:
+ return SelectGather(Node, X86::VPGATHERDDrm);
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ return SelectGather(Node, X86::VPGATHERDDYrm);
+ case Intrinsic::x86_avx2_gather_q_d:
+ return SelectGather(Node, X86::VPGATHERQDrm);
+ case Intrinsic::x86_avx2_gather_q_d_256:
+ return SelectGather(Node, X86::VPGATHERQDYrm);
}
break;
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0023424e66..0f7b787c1b 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -325,12 +325,10 @@ def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
-def v128mem : Operand<iPTR> {
- let PrintMethod = "printf128mem";
+def v128mem : X86MemOperand<"printf128mem"> {
let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
let ParserMatchClass = X86Mem128AsmOperand; }
-def v256mem : Operand<iPTR> {
- let PrintMethod = "printf256mem";
+def v256mem : X86MemOperand<"printf256mem"> {
let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
let ParserMatchClass = X86Mem256AsmOperand; }
}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 8974d45352..ad8d15dab3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7997,37 +7997,52 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
//===----------------------------------------------------------------------===//
// VGATHER - GATHER Operations
-//
-// [(set VR128:$dst, (IntGather128 VR128:$src1, addr:$src2, VR128:$idx,
-// VR128:$mask, (i8 imm:$sc)))]>, VEX_4VOp3;
-// [(set VR256:$dst, (IntGather256 VR256:$src1, addr:$src2, VR256:$idx,
-// VR256:$mask, (i8 imm:$sc)))]>, VEX_4VOp3;
multiclass avx2_gather<bits<8> opc, string OpcodeStr,
+ RegisterClass RC256, X86MemOperand memop256,
Intrinsic IntGather128, Intrinsic IntGather256> {
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, v128mem:$src2, VR128:$mask),
!strconcat(OpcodeStr,
"\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
[]>, VEX_4VOp3;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, v256mem:$src2, VR256:$mask),
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst),
+ (ins RC256:$src1, memop256:$src2, RC256:$mask),
!strconcat(OpcodeStr,
"\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
- []>, VEX_4VOp3;
+ []>, VEX_4VOp3, VEX_L;
}
-//let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
let Constraints = "$src1 = $dst" in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
+ VR256, v128mem,
int_x86_avx2_gather_d_pd,
int_x86_avx2_gather_d_pd_256>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
+ VR256, v256mem,
int_x86_avx2_gather_q_pd,
int_x86_avx2_gather_q_pd_256>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
+ VR256, v256mem,
int_x86_avx2_gather_d_ps,
int_x86_avx2_gather_d_ps_256>;
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
+ VR128, v256mem,
int_x86_avx2_gather_q_ps,
int_x86_avx2_gather_q_ps_256>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
+ VR256, v128mem,
+ int_x86_avx2_gather_d_q,
+ int_x86_avx2_gather_d_q_256>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
+ VR256, v256mem,
+ int_x86_avx2_gather_q_q,
+ int_x86_avx2_gather_q_q_256>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
+ VR256, v256mem,
+ int_x86_avx2_gather_d_d,
+ int_x86_avx2_gather_d_d_256>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
+ VR128, v256mem,
+ int_x86_avx2_gather_q_d,
+ int_x86_avx2_gather_q_d_256>;
}
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 3fb3497f35..459dbb235a 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -988,14 +988,14 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
<4 x i32>, <2 x double>, i8) nounwind readonly
define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1,
- <8 x i32> %idx, <4 x double> %mask) {
+ <4 x i32> %idx, <4 x double> %mask) {
; CHECK: vgatherdpd
%res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
- i8* %a1, <8 x i32> %idx, <4 x double> %mask, i8 2) ;
+ i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
- <8 x i32>, <4 x double>, i8) nounwind readonly
+ <4 x i32>, <4 x double>, i8) nounwind readonly
define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1,
<2 x i64> %idx, <2 x double> %mask) {
@@ -1047,12 +1047,92 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1,
declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
<2 x i64>, <4 x float>, i8) nounwind readonly
-define <8 x float> @test_x86_avx2_gather_q_ps_256(<8 x float> %a0, i8* %a1,
- <4 x i64> %idx, <8 x float> %mask) {
+define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x float> %mask) {
; CHECK: vgatherqps
- %res = call <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float> %a0,
- i8* %a1, <4 x i64> %idx, <8 x float> %mask, i8 2) ;
- ret <8 x float> %res
+ %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
+ <4 x i64>, <4 x float>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1,
+ <4 x i32> %idx, <2 x i64> %mask) {
+ ; CHECK: vpgatherdq
+ %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
+ i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
+ <4 x i32>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x i64> %mask) {
+ ; CHECK: vpgatherdq
+ %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
+ <4 x i32>, <4 x i64>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1,
+ <2 x i64> %idx, <2 x i64> %mask) {
+ ; CHECK: vpgatherqq
+ %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
+ i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
+ <2 x i64>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x i64> %mask) {
+ ; CHECK: vpgatherqq
+ %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
+ <4 x i64>, <4 x i64>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherdd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
+ <4 x i32>, <4 x i32>, i8) nounwind readonly
+
+define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1,
+ <8 x i32> %idx, <8 x i32> %mask) {
+ ; CHECK: vpgatherdd
+ %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
+ i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
+ <8 x i32>, <8 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1,
+ <2 x i64> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherqd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
+ i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
+ <2 x i64>, <4 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherqd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
}
-declare <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float>, i8*,
- <4 x i64>, <8 x float>, i8) nounwind readonly
+declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
+ <4 x i64>, <4 x i32>, i8) nounwind readonly
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index c543e46173..712c95aa72 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -728,9 +728,27 @@
# CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
0xc4 0xe2 0xe9 0x92 0x04 0x4f
-# CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
+# CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
+0xc4 0xe2 0xed 0x92 0x04 0x4f
+
+# CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
+0xc4 0x02 0x29 0x93 0x04 0x4f
+
+# CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
0xc4 0x02 0x2d 0x93 0x04 0x4f
+# CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
+0xc4 0xe2 0xe9 0x90 0x04 0x4f
+
+# CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
+0xc4 0xe2 0xed 0x90 0x04 0x4f
+
+# CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
+0xc4 0x02 0x29 0x91 0x04 0x4f
+
+# CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+0xc4 0x02 0x2d 0x91 0x04 0x4f
+
# rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
# CHECK: lock
# CHECK-NEXT: xaddq %rcx, %rbx
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index 77c0ff358e..b9943bafe7 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -4126,6 +4126,30 @@ _foo2:
// CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f]
vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
-// CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
+// CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f]
+ vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
+
+// CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x29,0x93,0x04,0x4f]
+ vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
+
+// CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
// CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f]
- vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
+ vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
+
+// CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f]
+ vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
+
+// CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f]
+ vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
+
+// CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x29,0x91,0x04,0x4f]
+ vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
+
+// CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+// CHECK: encoding: [0xc4,0x02,0x2d,0x91,0x04,0x4f]
+ vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10