summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IntrinsicsX86.td11
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp5
-rw-r--r--lib/Target/X86/X86InstrSSE.td25
-rw-r--r--test/CodeGen/X86/sse4a.ll41
-rw-r--r--test/MC/Disassembler/X86/x86-32.txt18
-rw-r--r--test/MC/Disassembler/X86/x86-64.txt18
-rw-r--r--test/MC/X86/x86_64-sse4a.s25
-rw-r--r--utils/TableGen/X86RecognizableInstr.cpp8
8 files changed, 145 insertions, 6 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 805d3667d1..afba3a0667 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1008,6 +1008,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// SSE4A
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], []>;
+ def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], []>;
+
+ def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+ llvm_i8_ty, llvm_i8_ty], []>;
+ def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], []>;
+
def int_x86_sse4a_movnt_ss : GCCBuiltin<"__builtin_ia32_movntss">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>;
def int_x86_sse4a_movnt_sd : GCCBuiltin<"__builtin_ia32_movntsd">,
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 24ac52d8e4..f79073ff58 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1150,8 +1150,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
// If there is a remaining operand, it must be a trailing immediate. Emit it
- // according to the right size for the instruction.
- if (CurOp != NumOps) {
+ // according to the right size for the instruction. Some instructions
+ // (SSE4a extrq and insertq) have two trailing immediates.
+ while (CurOp != NumOps && NumOps - CurOp <= 2) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a518997b78..e9545cdbe6 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7268,6 +7268,31 @@ defm : pclmul_alias<"lqlq", 0x00>;
//===----------------------------------------------------------------------===//
let Predicates = [HasSSE4A] in {
+
+let Constraints = "$src = $dst" in {
+def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst),
+ (ins VR128:$src, i8imm:$len, i8imm:$idx),
+ "extrq\t{$idx, $len, $src|$src, $len, $idx}",
+ [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+ imm:$idx))]>, TB, OpSize;
+def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$mask),
+ "extrq\t{$mask, $src|$src, $mask}",
+ [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src,
+ VR128:$mask))]>, TB, OpSize;
+
+def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx),
+ "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
+ [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
+ VR128:$src2, imm:$len, imm:$idx))]>, XD;
+def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src, VR128:$mask),
+ "insertq\t{$mask, $src|$src, $mask}",
+ [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
+ VR128:$mask))]>, XD;
+}
+
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
"movntss\t{$src, $dst|$dst, $src}",
[(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS;
diff --git a/test/CodeGen/X86/sse4a.ll b/test/CodeGen/X86/sse4a.ll
index 14c0fb3df2..076e213364 100644
--- a/test/CodeGen/X86/sse4a.ll
+++ b/test/CodeGen/X86/sse4a.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4a | FileCheck %s
define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp {
+; CHECK: test1:
; CHECK: movntss
-entry:
tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind
ret void
}
@@ -10,10 +10,47 @@ entry:
declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
define void @test2(i8* %p, <2 x double> %a) nounwind optsize ssp {
+; CHECK: test2:
; CHECK: movntsd
-entry:
tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind
ret void
}
declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
+
+define <2 x i64> @test3(<2 x i64> %x) nounwind uwtable ssp {
+; CHECK: test3:
+; CHECK: extrq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+define <2 x i64> @test4(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test4:
+; CHECK: extrq
+ %1 = bitcast <2 x i64> %y to <16 x i8>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+ ret <2 x i64> %2
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test5:
+; CHECK: insertq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
+
+define <2 x i64> @test6(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test6:
+; CHECK: insertq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 739fa6a843..3ec55f9689 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -612,3 +612,21 @@
# CHECK: shrxl %esi, %ebx, %edx
0xc4 0xe2 0x0b 0xf7 0xd3
+
+# CHECK: extrq $2, $3, %xmm0
+0x66 0x0f 0x78 0xc0 0x03 0x02
+
+# CHECK: extrq %xmm1, %xmm0
+0x66 0x0f 0x79 0xc1
+
+# CHECK: insertq $6, $5, %xmm1, %xmm0
+0xf2 0x0f 0x78 0xc1 0x05 0x06
+
+# CHECK: insertq %xmm1, %xmm0
+0xf2 0x0f 0x79 0xc1
+
+# CHECK: movntsd %xmm0, (%edi)
+0xf2 0x0f 0x2b 0x07
+
+# CHECK: movntss %xmm0, (%edi)
+0xf3 0x0f 0x2b 0x07
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index f4b8f46fa2..df449a403b 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -61,3 +61,21 @@
# CHECK: cmpordsd
0xf2 0x0f 0xc2 0xc7 0x07
+
+# CHECK: extrq $2, $3, %xmm0
+0x66 0x0f 0x78 0xc0 0x03 0x02
+
+# CHECK: extrq %xmm1, %xmm0
+0x66 0x0f 0x79 0xc1
+
+# CHECK: insertq $6, $5, %xmm1, %xmm0
+0xf2 0x0f 0x78 0xc1 0x05 0x06
+
+# CHECK: insertq %xmm1, %xmm0
+0xf2 0x0f 0x79 0xc1
+
+# CHECK: movntsd %xmm0, (%rdi)
+0xf2 0x0f 0x2b 0x07
+
+# CHECK: movntss %xmm0, (%rdi)
+0xf3 0x0f 0x2b 0x07
diff --git a/test/MC/X86/x86_64-sse4a.s b/test/MC/X86/x86_64-sse4a.s
new file mode 100644
index 0000000000..e5ed69e5b2
--- /dev/null
+++ b/test/MC/X86/x86_64-sse4a.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+extrq $2, $3, %xmm0
+# CHECK: extrq $2, $3, %xmm0
+# CHECK: encoding: [0x66,0x0f,0x78,0xc0,0x03,0x02]
+
+extrq %xmm1, %xmm0
+# CHECK: extrq %xmm1, %xmm0
+# CHECK: encoding: [0x66,0x0f,0x79,0xc1]
+
+insertq $6, $5, %xmm1, %xmm0
+# CHECK: insertq $6, $5, %xmm1, %xmm0
+# CHECK: encoding: [0xf2,0x0f,0x78,0xc1,0x05,0x06]
+
+insertq %xmm1, %xmm0
+# CHECK: insertq %xmm1, %xmm0
+# CHECK: encoding: [0xf2,0x0f,0x79,0xc1]
+
+movntsd %xmm0, (%rdi)
+# CHECK: movntsd %xmm0, (%rdi)
+# CHECK: encoding: [0xf2,0x0f,0x2b,0x07]
+
+movntss %xmm0, (%rdi)
+# CHECK: movntss %xmm0, (%rdi)
+# CHECK: encoding: [0xf3,0x0f,0x2b,0x07]
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 6a01cce637..afb25be7ff 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -690,12 +690,13 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
// Operand 2 is a register operand in the R/M field.
// - In AVX, there is a register operand in the VEX.vvvv field here -
// Operand 3 (optional) is an immediate.
+ // Operand 4 (optional) is an immediate.
if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
"Unexpected number of operands for MRMSrcRegFrm with VEX_4V");
else
- assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+ assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 4 &&
"Unexpected number of operands for MRMSrcRegFrm");
HANDLE_OPERAND(roRegister)
@@ -716,6 +717,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
if (!HasMemOp4Prefix)
HANDLE_OPTIONAL(immediate)
HANDLE_OPTIONAL(immediate) // above might be a register in 7:4
+ HANDLE_OPTIONAL(immediate)
break;
case X86Local::MRMSrcMem:
// Operand 1 is a register operand in the Reg/Opcode field.
@@ -759,16 +761,18 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
case X86Local::MRM7r:
// Operand 1 is a register operand in the R/M field.
// Operand 2 (optional) is an immediate or relocation.
+ // Operand 3 (optional) is an immediate.
if (HasVEX_4VPrefix)
assert(numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMnRFrm with VEX_4V");
else
- assert(numPhysicalOperands <= 2 &&
+ assert(numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMnRFrm");
if (HasVEX_4VPrefix)
HANDLE_OPERAND(vvvvRegister)
HANDLE_OPTIONAL(rmRegister)
HANDLE_OPTIONAL(relocation)
+ HANDLE_OPTIONAL(immediate)
break;
case X86Local::MRM0m:
case X86Local::MRM1m: