diff options
author | Manman Ren <mren@apple.com> | 2012-06-29 00:54:20 +0000 |
---|---|---|
committer | Manman Ren <mren@apple.com> | 2012-06-29 00:54:20 +0000 |
commit | 40307c7dbe2d104784763c28697d7926793674af (patch) | |
tree | 726fca55dc107f1d5cc6e47f15bec61fe252a0c5 | |
parent | cfc49bfd3f7f4033b51979aca3fa7f998652d146 (diff) | |
download | llvm-40307c7dbe2d104784763c28697d7926793674af.tar.gz llvm-40307c7dbe2d104784763c28697d7926793674af.tar.bz2 llvm-40307c7dbe2d104784763c28697d7926793674af.tar.xz |
X86: add more GATHER intrinsics in LLVM
Corrected type for index of llvm.x86.avx2.gather.d.pd.256
from 256-bit to 128-bit.
Corrected types for src|dst|mask of llvm.x86.avx2.gather.q.ps.256
from 256-bit to 128-bit.
Support the following intrinsics:
llvm.x86.avx2.gather.d.q, llvm.x86.avx2.gather.q.q
llvm.x86.avx2.gather.d.q.256, llvm.x86.avx2.gather.q.q.256
llvm.x86.avx2.gather.d.d, llvm.x86.avx2.gather.q.d
llvm.x86.avx2.gather.d.d.256, llvm.x86.avx2.gather.q.d.256
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159402 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 39 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.cpp | 28 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 33 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-intrinsics-x86.ll | 100 | ||||
-rw-r--r-- | test/MC/Disassembler/X86/simple-tests.txt | 20 | ||||
-rw-r--r-- | test/MC/X86/x86_64-avx-encoding.s | 28 |
8 files changed, 231 insertions, 39 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index ba38c3ce13..14fd76d213 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1752,7 +1752,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrReadMem]>; def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem]>; def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, Intrinsic<[llvm_v2f64_ty], @@ -1775,8 +1775,41 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem]>; def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v8f32_ty, llvm_i8_ty], + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadMem]>; + + def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem]>; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index b13e1ca41c..d58e36c803 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -506,18 +506,26 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, // We can tell whether it is VSIB or SIB after instruction ID is decoded, // but instruction ID may not be decoded yet when calling readSIB. uint32_t Opcode = mcInst.getOpcode(); - bool IsGather = (Opcode == X86::VGATHERDPDrm || - Opcode == X86::VGATHERQPDrm || - Opcode == X86::VGATHERDPSrm || - Opcode == X86::VGATHERQPSrm); - bool IsGatherY = (Opcode == X86::VGATHERDPDYrm || - Opcode == X86::VGATHERQPDYrm || - Opcode == X86::VGATHERDPSYrm || - Opcode == X86::VGATHERQPSYrm); - if (IsGather || IsGatherY) { + bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || + Opcode == X86::VGATHERDPDYrm || + Opcode == X86::VGATHERQPDrm || + Opcode == X86::VGATHERDPSrm || + Opcode == X86::VGATHERQPSrm || + Opcode == X86::VPGATHERDQrm || + Opcode == X86::VPGATHERDQYrm || + Opcode == X86::VPGATHERQQrm || + Opcode == X86::VPGATHERDDrm || + Opcode == X86::VPGATHERQDrm); + bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || + Opcode == X86::VGATHERDPSYrm || + Opcode == X86::VGATHERQPSYrm || + Opcode == X86::VPGATHERQQYrm || + Opcode == X86::VPGATHERDDYrm || + Opcode == X86::VPGATHERQDYrm); + if (IndexIs128 || IndexIs256) { unsigned IndexOffset = insn.sibIndex - (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); - SIBIndex IndexBase = IsGatherY ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; insn.sibIndex = (SIBIndex)(IndexBase + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index ea9e5bcf18..cad90f48cb 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2011,6 +2011,22 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return SelectGather(Node, X86::VGATHERQPSrm); case Intrinsic::x86_avx2_gather_q_ps_256: return SelectGather(Node, X86::VGATHERQPSYrm); + case Intrinsic::x86_avx2_gather_d_q: + return SelectGather(Node, X86::VPGATHERDQrm); + case Intrinsic::x86_avx2_gather_d_q_256: + return SelectGather(Node, X86::VPGATHERDQYrm); + case Intrinsic::x86_avx2_gather_q_q: + return SelectGather(Node, X86::VPGATHERQQrm); + case Intrinsic::x86_avx2_gather_q_q_256: + return SelectGather(Node, X86::VPGATHERQQYrm); + case Intrinsic::x86_avx2_gather_d_d: + return SelectGather(Node, X86::VPGATHERDDrm); + case Intrinsic::x86_avx2_gather_d_d_256: + return SelectGather(Node, X86::VPGATHERDDYrm); + case Intrinsic::x86_avx2_gather_q_d: + return SelectGather(Node, X86::VPGATHERQDrm); + case Intrinsic::x86_avx2_gather_q_d_256: + return SelectGather(Node, X86::VPGATHERQDYrm); } break; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0023424e66..0f7b787c1b 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -325,12 +325,10 @@ def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } -def v128mem : Operand<iPTR> { - let PrintMethod = "printf128mem"; +def v128mem : X86MemOperand<"printf128mem"> { let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); let ParserMatchClass = X86Mem128AsmOperand; } -def v256mem : Operand<iPTR> { - let PrintMethod = "printf256mem"; +def v256mem : X86MemOperand<"printf256mem"> { let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); let ParserMatchClass = X86Mem256AsmOperand; } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8974d45352..ad8d15dab3 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7997,37 +7997,52 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; //===----------------------------------------------------------------------===// // VGATHER - GATHER Operations -// -// [(set VR128:$dst, (IntGather128 VR128:$src1, addr:$src2, VR128:$idx, -// VR128:$mask, (i8 imm:$sc)))]>, VEX_4VOp3; -// [(set VR256:$dst, (IntGather256 VR256:$src1, addr:$src2, VR256:$idx, -// VR256:$mask, (i8 imm:$sc)))]>, VEX_4VOp3; multiclass avx2_gather<bits<8> opc, string OpcodeStr, + RegisterClass RC256, X86MemOperand memop256, Intrinsic IntGather128, Intrinsic IntGather256> { def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, v128mem:$src2, VR128:$mask), !strconcat(OpcodeStr, "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), []>, VEX_4VOp3; - def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, v256mem:$src2, VR256:$mask), + def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst), + (ins RC256:$src1, memop256:$src2, RC256:$mask), !strconcat(OpcodeStr, "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), - []>, VEX_4VOp3; + []>, VEX_4VOp3, VEX_L; } -//let Constraints = "$src1 = $dst, $mask = $mask_wb" in { let Constraints = "$src1 = $dst" in { defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", + VR256, v128mem, int_x86_avx2_gather_d_pd, int_x86_avx2_gather_d_pd_256>, VEX_W; defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", + VR256, v256mem, int_x86_avx2_gather_q_pd, int_x86_avx2_gather_q_pd_256>, VEX_W; defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", + VR256, v256mem, int_x86_avx2_gather_d_ps, int_x86_avx2_gather_d_ps_256>; defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", + VR128, v256mem, int_x86_avx2_gather_q_ps, int_x86_avx2_gather_q_ps_256>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", + VR256, v128mem, + int_x86_avx2_gather_d_q, + int_x86_avx2_gather_d_q_256>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", + VR256, v256mem, + int_x86_avx2_gather_q_q, + int_x86_avx2_gather_q_q_256>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", + VR256, v256mem, + int_x86_avx2_gather_d_d, + int_x86_avx2_gather_d_d_256>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", + VR128, v256mem, + int_x86_avx2_gather_q_d, + int_x86_avx2_gather_q_d_256>; } diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 3fb3497f35..459dbb235a 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -988,14 +988,14 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, - <8 x i32> %idx, <4 x double> %mask) { + <4 x i32> %idx, <4 x double> %mask) { ; CHECK: vgatherdpd %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, - i8* %a1, <8 x i32> %idx, <4 x double> %mask, i8 2) ; + i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; ret <4 x double> %res } declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, - <8 x i32>, <4 x double>, i8) nounwind readonly + <4 x i32>, <4 x double>, i8) nounwind readonly define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, <2 x i64> %idx, <2 x double> %mask) { @@ -1047,12 +1047,92 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly -define <8 x float> @test_x86_avx2_gather_q_ps_256(<8 x float> %a0, i8* %a1, - <4 x i64> %idx, <8 x float> %mask) { +define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, + <4 x i64> %idx, <4 x float> %mask) { ; CHECK: vgatherqps - %res = call <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float> %a0, - i8* %a1, <4 x i64> %idx, <8 x float> %mask, i8 2) ; - ret <8 x float> %res + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, + i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, + <4 x i64>, <4 x float>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, + <4 x i32> %idx, <2 x i64> %mask) { + ; CHECK: vpgatherdq + %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, + i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, + <4 x i32>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, + <4 x i32> %idx, <4 x i64> %mask) { + ; CHECK: vpgatherdq + %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, + i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, + <4 x i32>, <4 x i64>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, + <2 x i64> %idx, <2 x i64> %mask) { + ; CHECK: vpgatherqq + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, + <2 x i64>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, + <4 x i64> %idx, <4 x i64> %mask) { + ; CHECK: vpgatherqq + %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, + i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, + <4 x i64>, <4 x i64>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, + <4 x i32> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherdd + %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, + i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, + <4 x i32>, <4 x i32>, i8) nounwind readonly + +define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, + <8 x i32> %idx, <8 x i32> %mask) { + ; CHECK: vpgatherdd + %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, + i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, + <8 x i32>, <8 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, + <2 x i64> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherqd + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, + i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, + <2 x i64>, <4 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, + <4 x i64> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherqd + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, + i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res } -declare <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float>, i8*, - <4 x i64>, <8 x float>, i8) nounwind readonly +declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, + <4 x i64>, <4 x i32>, i8) nounwind readonly diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index c543e46173..712c95aa72 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -728,9 +728,27 @@ # CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 0xc4 0xe2 0xe9 0x92 0x04 0x4f -# CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 +# CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 +0xc4 0xe2 0xed 0x92 0x04 0x4f + +# CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 +0xc4 0x02 0x29 0x93 0x04 0x4f + +# CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 0xc4 0x02 0x2d 0x93 0x04 0x4f +# CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 +0xc4 0xe2 0xe9 0x90 0x04 0x4f + +# CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 +0xc4 0xe2 0xed 0x90 0x04 0x4f + +# CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 +0xc4 0x02 0x29 0x91 0x04 0x4f + +# CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 +0xc4 0x02 0x2d 0x91 0x04 0x4f + # rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling # CHECK: lock # CHECK-NEXT: xaddq %rcx, %rbx diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s index 77c0ff358e..b9943bafe7 100644 --- a/test/MC/X86/x86_64-avx-encoding.s +++ b/test/MC/X86/x86_64-avx-encoding.s @@ -4126,6 +4126,30 @@ _foo2: // CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f] vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 -// CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 +// CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f] + vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 + +// CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x29,0x93,0x04,0x4f] + vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 + +// CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 // CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f] - vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 + vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 + +// CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f] + vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 + +// CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f] + vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 + +// CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x29,0x91,0x04,0x4f] + vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 + +// CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x2d,0x91,0x04,0x4f] + vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 |