summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2006-09-08 06:48:29 +0000
committerEvan Cheng <evan.cheng@apple.com>2006-09-08 06:48:29 +0000
commit25ab690a43cbbb591b76d49e3595b019c32f4b3f (patch)
treefe952a3e394b9f01b6ce8ed8691cee8c507ed094 /lib/Target
parent1e5fb6928c510bc945dbcd23d99022288ad7e863 (diff)
downloadllvm-25ab690a43cbbb591b76d49e3595b019c32f4b3f.tar.gz
llvm-25ab690a43cbbb591b76d49e3595b019c32f4b3f.tar.bz2
llvm-25ab690a43cbbb591b76d49e3595b019c32f4b3f.tar.xz
Committing X86-64 support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30177 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/README-X86-64.txt269
-rw-r--r--lib/Target/X86/X86.td14
-rwxr-xr-xlib/Target/X86/X86ATTAsmPrinter.cpp107
-rwxr-xr-xlib/Target/X86/X86ATTAsmPrinter.h6
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp6
-rwxr-xr-xlib/Target/X86/X86AsmPrinter.h11
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp350
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp166
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp849
-rw-r--r--lib/Target/X86/X86ISelLowering.h10
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp18
-rw-r--r--lib/Target/X86/X86InstrInfo.h33
-rw-r--r--lib/Target/X86/X86InstrInfo.td94
-rw-r--r--lib/Target/X86/X86InstrX86-64.td1084
-rwxr-xr-xlib/Target/X86/X86IntelAsmPrinter.cpp30
-rwxr-xr-xlib/Target/X86/X86IntelAsmPrinter.h7
-rw-r--r--lib/Target/X86/X86JITInfo.cpp79
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp388
-rw-r--r--lib/Target/X86/X86RegisterInfo.h18
-rw-r--r--lib/Target/X86/X86RegisterInfo.td377
-rw-r--r--lib/Target/X86/X86Subtarget.cpp47
-rw-r--r--lib/Target/X86/X86Subtarget.h13
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.cpp5
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp60
-rw-r--r--lib/Target/X86/X86TargetMachine.h24
25 files changed, 3603 insertions, 462 deletions
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
new file mode 100644
index 0000000000..af3e273237
--- /dev/null
+++ b/lib/Target/X86/README-X86-64.txt
@@ -0,0 +1,269 @@
+//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
+
+Implement different PIC models? Right now we only support Mac OS X with small
+PIC code model.
+
+//===---------------------------------------------------------------------===//
+
+Make use of "Red Zone".
+
+//===---------------------------------------------------------------------===//
+
+Implement __int128 and long double support.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern void xx(void);
+void bar(void) {
+ xx();
+}
+
+gcc compiles to:
+
+.globl _bar
+_bar:
+ jmp _xx
+
+We need to do the tailcall optimization as well.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+int test(int a)
+{
+ return a * 3;
+}
+
+We generates
+ leal (%edi,%edi,2), %eax
+
+We should be generating
+ leal (%rdi,%rdi,2), %eax
+
+instead. The later form does not require an address-size prefix 67H.
+
+It's probably ok to simply emit the corresponding 64-bit super class registers
+in this case?
+
+
+//===---------------------------------------------------------------------===//
+
+AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
+multiplication by a constant. How much of it applies to Intel's X86-64
+implementation? There are definite trade-offs to consider: latency vs. register
+pressure vs. code size.
+
+//===---------------------------------------------------------------------===//
+
+Are we better off using branches instead of cmove to implement FP to
+unsigned i64?
+
+_conv:
+ ucomiss LC0(%rip), %xmm0
+ cvttss2siq %xmm0, %rdx
+ jb L3
+ subss LC0(%rip), %xmm0
+ movabsq $-9223372036854775808, %rax
+ cvttss2siq %xmm0, %rdx
+ xorq %rax, %rdx
+L3:
+ movq %rdx, %rax
+ ret
+
+instead of
+
+_conv:
+ movss LCPI1_0(%rip), %xmm1
+ cvttss2siq %xmm0, %rcx
+ movaps %xmm0, %xmm2
+ subss %xmm1, %xmm2
+ cvttss2siq %xmm2, %rax
+ movabsq $-9223372036854775808, %rdx
+ xorq %rdx, %rax
+ ucomiss %xmm1, %xmm0
+ cmovb %rcx, %rax
+ ret
+
+Seems like the jb branch has high likelyhood of being taken. It would have
+saved a few instructions.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen:
+
+int X[2];
+int b;
+void test(void) {
+ memset(X, b, 2*sizeof(X[0]));
+}
+
+llc:
+ movq _b@GOTPCREL(%rip), %rax
+ movzbq (%rax), %rax
+ movq %rax, %rcx
+ shlq $8, %rcx
+ orq %rax, %rcx
+ movq %rcx, %rax
+ shlq $16, %rax
+ orq %rcx, %rax
+ movq %rax, %rcx
+ shlq $32, %rcx
+ movq _X@GOTPCREL(%rip), %rdx
+ orq %rax, %rcx
+ movq %rcx, (%rdx)
+ ret
+
+gcc:
+ movq _b@GOTPCREL(%rip), %rax
+ movabsq $72340172838076673, %rdx
+ movzbq (%rax), %rax
+ imulq %rdx, %rax
+ movq _X@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Vararg function prologue can be further optimized. Currently all XMM registers
+are stored into register save area. Most of them can be eliminated since the
+upper bound of the number of XMM registers used are passed in %al. gcc produces
+something like the following:
+
+ movzbl %al, %edx
+ leaq 0(,%rdx,4), %rax
+ leaq 4+L2(%rip), %rdx
+ leaq 239(%rsp), %rax
+ jmp *%rdx
+ movaps %xmm7, -15(%rax)
+ movaps %xmm6, -31(%rax)
+ movaps %xmm5, -47(%rax)
+ movaps %xmm4, -63(%rax)
+ movaps %xmm3, -79(%rax)
+ movaps %xmm2, -95(%rax)
+ movaps %xmm1, -111(%rax)
+ movaps %xmm0, -127(%rax)
+L2:
+
+It jumps over the movaps that do not need to be stored. Hard to see this being
+significant as it added 5 instruciton (including a indirect branch) to avoid
+executing 0 to 8 stores in the function prologue.
+
+Perhaps we can optimize for the common case where no XMM registers are used for
+parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
+leaf function where we can determine that no XMM input parameter is need, avoid
+emitting the stores at all.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 has a complex calling convention for aggregate passing by value:
+
+1. If the size of an object is larger than two eightbytes, or in C++, is a non-
+ POD structure or union type, or contains unaligned fields, it has class
+ MEMORY.
+2. Both eightbytes get initialized to class NO_CLASS.
+3. Each field of an object is classified recursively so that always two fields
+ are considered. The resulting class is calculated according to the classes
+ of the fields in the eightbyte:
+ (a) If both classes are equal, this is the resulting class.
+ (b) If one of the classes is NO_CLASS, the resulting class is the other
+ class.
+ (c) If one of the classes is MEMORY, the result is the MEMORY class.
+ (d) If one of the classes is INTEGER, the result is the INTEGER.
+ (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
+ class.
+ (f) Otherwise class SSE is used.
+4. Then a post merger cleanup is done:
+ (a) If one of the classes is MEMORY, the whole argument is passed in memory.
+ (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
+
+Currently llvm frontend does not handle this correctly.
+
+Problem 1:
+ typedef struct { int i; double d; } QuadWordS;
+It is currently passed in two i64 integer registers. However, gcc compiled
+callee expects the second element 'd' to be passed in XMM0.
+
+Problem 2:
+ typedef struct { int32_t i; float j; double d; } QuadWordS;
+The size of the first two fields == i64 so they will be combined and passed in
+a integer register RDI. The third field is still passed in XMM0.
+
+Problem 3:
+ typedef struct { int64_t i; int8_t j; int64_t d; } S;
+ void test(S s)
+The size of this aggregate is greater than two i64 so it should be passed in
+memory. Currently llvm breaks this down and passed it in three integer
+registers.
+
+Problem 4:
+Taking problem 3 one step ahead where a function expects a aggregate value
+in memory followed by more parameter(s) passed in register(s).
+ void test(S s, int b)
+
+LLVM IR does not allow parameter passing by aggregates, therefore it must break
+the aggregates value (in problem 3 and 4) into a number of scalar values:
+ void %test(long %s.i, byte %s.j, long %s.d);
+
+However, if the backend were to lower this code literally it would pass the 3
+values in integer registers. To force it be passed in memory, the frontend
+should change the function signiture to:
+ void %test(long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+And the callee would look something like this:
+ call void %test( undef, undef, undef, undef, undef, undef,
+ %tmp.s.i, %tmp.s.j, %tmp.s.d );
+The first 6 undef parameters would exhaust the 6 integer registers used for
+parameter passing. The following three integer values would then be forced into
+memory.
+
+For problem 4, the parameter 'd' would be moved to the front of the parameter
+list so it will be passed in register:
+ void %test(int %d,
+ long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern int dst[];
+extern int* ptr;
+
+void test(void) {
+ ptr = dst;
+}
+
+We generate this code for static relocation model:
+
+_test:
+ leaq _dst(%rip), %rax
+ movq %rax, _ptr(%rip)
+ ret
+
+If we are in small code model, they we can treat _dst as a 32-bit constant.
+ movq $_dst, _ptr(%rip)
+
+Note, however, we should continue to use RIP relative addressing mode as much as
+possible. The above is actually one byte shorter than
+ movq $_dst, _ptr
+
+//===---------------------------------------------------------------------===//
+
+Right now the asm printer assumes GlobalAddress are accessed via RIP relative
+addressing. Therefore, it is not possible to generate this:
+ movabsq $__ZTV10polynomialIdE+16, %rax
+
+That is ok for now since we currently only support small model. So the above
+is selected as
+ leaq __ZTV10polynomialIdE+16(%rip), %rax
+
+This is probably slightly slower but is much shorter than movabsq. However, if
+we were to support medium or larger code models, we need to use the movabs
+instruction. We should probably introduce something like AbsoluteAddress to
+distinguish it from GlobalAddress so the asm printer and JIT code emitter can
+do the right thing.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index e15512db23..c4b3d8635f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -20,8 +20,8 @@ include "../Target.td"
// X86 Subtarget features.
//
-def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true",
- "Enable 64-bit instructions">;
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions">;
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
@@ -61,6 +61,8 @@ def : Proc<"prescott", [FeatureMMX, FeatureSSE1, FeatureSSE2,
FeatureSSE3]>;
def : Proc<"nocona", [FeatureMMX, FeatureSSE1, FeatureSSE2,
FeatureSSE3, Feature64Bit]>;
+def : Proc<"core2", [FeatureMMX, FeatureSSE1, FeatureSSE2,
+ FeatureSSE3, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
@@ -105,16 +107,20 @@ def X86InstrInfo : InstrInfo {
// should be kept up-to-date with the fields in the X86InstrInfo.h file.
let TSFlagsFields = ["FormBits",
"hasOpSizePrefix",
+ "hasAdSizePrefix",
"Prefix",
+ "hasREX_WPrefix",
"ImmTypeBits",
"FPFormBits",
"Opcode"];
let TSFlagsShifts = [0,
6,
7,
- 11,
+ 8,
+ 12,
13,
- 16];
+ 16,
+ 24];
}
// The X86 target supports two different syntaxes for emitting machine code.
diff --git a/lib/Target/X86/X86ATTAsmPrinter.cpp b/lib/Target/X86/X86ATTAsmPrinter.cpp
index e3653e4920..b17cde18de 100755
--- a/lib/Target/X86/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/X86ATTAsmPrinter.cpp
@@ -126,8 +126,9 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
- ? MVT::i16 : MVT::i8;
+ MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
}
for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
@@ -148,9 +149,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
if (!isMemOp) O << '$';
O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_"
<< MO.getJumpTableIndex();
- if (Subtarget->isTargetDarwin() &&
+ if (X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() == Reloc::PIC_)
O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget->is64Bit())
+ O << "(%rip)";
return;
}
case MachineOperand::MO_ConstantPoolIndex: {
@@ -158,7 +161,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
if (!isMemOp) O << '$';
O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
<< MO.getConstantPoolIndex();
- if (Subtarget->isTargetDarwin() &&
+ if (X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() == Reloc::PIC_)
O << "-\"L" << getFunctionNumber() << "$pb\"";
int Offset = MO.getOffset();
@@ -166,47 +169,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << "+" << Offset;
else if (Offset < 0)
O << Offset;
+
+ if (Subtarget->is64Bit())
+ O << "(%rip)";
return;
}
case MachineOperand::MO_GlobalAddress: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
if (!isMemOp && !isCallOp) O << '$';
- // Darwin block shameless ripped from PPCAsmPrinter.cpp
- if (Subtarget->isTargetDarwin() &&
+
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+ bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ if (X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() != Reloc::Static) {
- GlobalValue *GV = MO.getGlobal();
- std::string Name = Mang->getValueName(GV);
// Link-once, External, or Weakly-linked global variables need
// non-lazily-resolved stubs
- if (GV->isExternal() || GV->hasWeakLinkage() ||
- GV->hasLinkOnceLinkage()) {
+ if (isExt) {
// Dynamically-resolved functions need a stub for the function.
- if (isCallOp && isa<Function>(GV) && cast<Function>(GV)->isExternal()) {
+ if (isCallOp && isa<Function>(GV)) {
FnStubs.insert(Name);
O << "L" << Name << "$stub";
} else {
GVStubs.insert(Name);
O << "L" << Name << "$non_lazy_ptr";
}
- } else {
- O << Mang->getValueName(GV);
- }
+ } else
+ O << Name;
if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
O << "-\"L" << getFunctionNumber() << "$pb\"";
- } else
- O << Mang->getValueName(MO.getGlobal());
+ } else
+ O << Name;
+
int Offset = MO.getOffset();
if (Offset > 0)
O << "+" << Offset;
else if (Offset < 0)
O << Offset;
+
+ if (!isCallOp &&
+ Subtarget->is64Bit()) {
+ if (isExt && TM.getRelocationModel() != Reloc::Static)
+ O << "@GOTPCREL";
+ O << "(%rip)";
+ }
+
return;
}
case MachineOperand::MO_ExternalSymbol: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
if (isCallOp &&
- Subtarget->isTargetDarwin() &&
+ X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() != Reloc::Static) {
std::string Name(TAI->getGlobalPrefix());
Name += MO.getSymbolName();
@@ -216,6 +231,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
if (!isCallOp) O << '$';
O << TAI->getGlobalPrefix() << MO.getSymbolName();
+
+ if (!isCallOp &&
+ Subtarget->is64Bit())
+ O << "(%rip)";
+
return;
}
default:
@@ -238,7 +258,8 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
}
}
-void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
+void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier){
assert(isMem(MI, Op) && "Invalid memory reference!");
const MachineOperand &BaseReg = MI->getOperand(Op);
@@ -266,12 +287,13 @@ void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
if (IndexReg.getReg() || BaseReg.getReg()) {
O << "(";
- if (BaseReg.getReg())
- printOperand(MI, Op);
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op, Modifier);
+ }
if (IndexReg.getReg()) {
O << ",";
- printOperand(MI, Op+2);
+ printOperand(MI, Op+2, Modifier);
if (ScaleVal != 1)
O << "," << ScaleVal;
}
@@ -350,43 +372,25 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
///
void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
++EmittedInsts;
- // This works around some Darwin assembler bugs.
- if (Subtarget->isTargetDarwin()) {
- switch (MI->getOpcode()) {
- case X86::REP_MOVSB:
- O << "rep/movsb (%esi),(%edi)\n";
- return;
- case X86::REP_MOVSD:
- O << "rep/movsl (%esi),(%edi)\n";
- return;
- case X86::REP_MOVSW:
- O << "rep/movsw (%esi),(%edi)\n";
- return;
- case X86::REP_STOSB:
- O << "rep/stosb\n";
- return;
- case X86::REP_STOSD:
- O << "rep/stosl\n";
- return;
- case X86::REP_STOSW:
- O << "rep/stosw\n";
- return;
- default:
- break;
- }
- }
// See if a truncate instruction can be turned into a nop.
switch (MI->getOpcode()) {
default: break;
- case X86::TRUNC_GR32_GR16:
- case X86::TRUNC_GR32_GR8:
- case X86::TRUNC_GR16_GR8: {
+ case X86::TRUNC_64to32:
+ case X86::TRUNC_64to16:
+ case X86::TRUNC_32to16:
+ case X86::TRUNC_32to8:
+ case X86::TRUNC_16to8:
+ case X86::TRUNC_32_to8:
+ case X86::TRUNC_16_to8: {
const MachineOperand &MO0 = MI->getOperand(0);
const MachineOperand &MO1 = MI->getOperand(1);
unsigned Reg0 = MO0.getReg();
unsigned Reg1 = MO1.getReg();
- if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
+ unsigned Opc = MI->getOpcode();
+ if (Opc == X86::TRUNC_64to32)
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
+ else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
else
Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
@@ -395,6 +399,9 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
O << "\n\t";
break;
}
+ case X86::PsMOVZX64rr32:
+ O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
+ break;
}
// Call the autogenerated instruction printer routines.
diff --git a/lib/Target/X86/X86ATTAsmPrinter.h b/lib/Target/X86/X86ATTAsmPrinter.h
index ff707caee6..167e812f4d 100755
--- a/lib/Target/X86/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/X86ATTAsmPrinter.h
@@ -60,6 +60,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
void printf128mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo, "subreg64");
+ }
bool printAsmMRegister(const MachineOperand &MO, const char Mode);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -69,7 +72,8 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
void printMachineInstruction(const MachineInstr *MI);
void printSSECC(const MachineInstr *MI, unsigned Op);
- void printMemReference(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
void printPICLabel(const MachineInstr *MI, unsigned Op);
bool runOnMachineFunction(MachineFunction &F);
};
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 4a54e5914d..b634d13ea4 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -30,8 +30,12 @@ Statistic<> llvm::EmittedInsts("asm-printer",
"Number of machine instrs printed");
/// doInitialization
-bool X86SharedAsmPrinter::doInitialization(Module &M) {
+bool X86SharedAsmPrinter::doInitialization(Module &M) {
if (Subtarget->isTargetDarwin()) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget->is64Bit())
+ X86PICStyle = PICStyle::Stub;
+
// Emit initial debug information.
DW.BeginModule(&M);
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 8d32f59d8b..6db9e45dc3 100755
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -29,12 +29,19 @@ namespace llvm {
extern Statistic<> EmittedInsts;
+// FIXME: Move this to CodeGen/AsmPrinter.h
+namespace PICStyle {
+ enum X86AsmPICStyle {
+ Stub, GOT
+ };
+}
+
struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
DwarfWriter DW;
X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM,
const TargetAsmInfo *T)
- : AsmPrinter(O, TM, T), DW(O, this, T) {
+ : AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
@@ -49,6 +56,8 @@ struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
MachineFunctionPass::getAnalysisUsage(AU);
}
+ PICStyle::X86AsmPICStyle X86PICStyle;
+
const X86Subtarget *Subtarget;
// Necessary for Darwin to print out the apprioriate types of linker stubs
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f7d53caed4..0ac8bc5f32 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "X86Relocations.h"
#include "X86.h"
@@ -35,14 +37,16 @@ namespace {
namespace {
class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
const X86InstrInfo *II;
- TargetMachine &TM;
+ const TargetData *TD;
+ TargetMachine &TM;
MachineCodeEmitter &MCE;
+ bool Is64BitMode;
public:
explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
- : II(0), TM(tm), MCE(mce) {}
+ : II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {}
Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
- const X86InstrInfo& ii)
- : II(&ii), TM(tm), MCE(mce) {}
+ const X86InstrInfo &ii, const TargetData &td, bool is64)
+ : II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {}
bool runOnMachineFunction(MachineFunction &MF);
@@ -54,20 +58,29 @@ namespace {
private:
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
- void emitPCRelativeValue(unsigned Address);
- void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall);
- void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0);
+ void emitPCRelativeValue(intptr_t Address);
+ void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub);
+ void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
+ int Disp = 0, unsigned PCAdj = 0);
void emitExternalSymbolAddress(const char *ES, bool isPCRelative);
+ void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0,
+ unsigned PCAdj = 0);
+ void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0);
- void emitDisplacementField(const MachineOperand *RelocOp, int DispVal);
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ unsigned PCAdj = 0);
void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
- void emitConstant(unsigned Val, unsigned Size);
+ void emitConstant(uint64_t Val, unsigned Size);
void emitMemModRMByte(const MachineInstr &MI,
- unsigned Op, unsigned RegOpcodeField);
+ unsigned Op, unsigned RegOpcodeField,
+ unsigned PCAdj = 0);
+ unsigned getX86RegNum(unsigned RegNo);
+ bool isX86_64ExtendedReg(const MachineOperand &MO);
+ unsigned determineREX(const MachineInstr &MI);
};
}
@@ -83,6 +96,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
MF.getTarget().getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
+ TD = ((X86TargetMachine&)MF.getTarget()).getTargetData();
+ Is64BitMode =
+ ((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit();
do {
MCE.startFunction(MF);
@@ -98,9 +114,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-/// emitPCRelativeValue - Emit a 32-bit PC relative address.
+/// emitPCRelativeValue - Emit a PC relative address.
///
-void Emitter::emitPCRelativeValue(unsigned Address) {
+void Emitter::emitPCRelativeValue(intptr_t Address) {
MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4);
}
@@ -119,20 +135,22 @@ void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
/// emitGlobalAddressForCall - Emit the specified address to the code stream
/// assuming this is part of a function call, which is PC relative.
///
-void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) {
+void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) {
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
X86::reloc_pcrel_word, GV, 0,
- !isTailCall /*Doesn'tNeedStub*/));
+ DoesntNeedStub));
MCE.emitWordLE(0);
}
/// emitGlobalAddress - Emit the specified address to the code stream assuming
-/// this is part of a "take the address of a global" instruction, which is not
-/// PC relative.
+/// this is part of a "take the address of a global" instruction.
///
-void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) {
- MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
- X86::reloc_absolute_word, GV));
+void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
+ int Disp /* = 0 */,
+ unsigned PCAdj /* = 0 */) {
+ unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt,
+ GV, PCAdj));
MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
}
@@ -145,6 +163,26 @@ void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) {
MCE.emitWordLE(0);
}
+/// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */,
+ unsigned PCAdj /* = 0 */) {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, CPI, PCAdj));
+ MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
+}
+
+/// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI,
+ unsigned PCAdj /* = 0 */) {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, JTI, PCAdj));
+ MCE.emitWordLE(0); // The relocated value will be added to the displacement
+}
+
/// N86 namespace - Native X86 Register numbers... used by X86 backend.
///
namespace N86 {
@@ -153,28 +191,53 @@ namespace N86 {
};
}
-
// getX86RegNum - This function maps LLVM register identifiers to their X86
// specific numbering, which is used in various places encoding instructions.
//
-static unsigned getX86RegNum(unsigned RegNo) {
+unsigned Emitter::getX86RegNum(unsigned RegNo) {
switch(RegNo) {
- case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
- case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
- case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
- case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
- case X86::ESP: case X86::SP: case X86::AH: return N86::ESP;
- case X86::EBP: case X86::BP: case X86::CH: return N86::EBP;
- case X86::ESI: case X86::SI: case X86::DH: return N86::ESI;
- case X86::EDI: case X86::DI: case X86::BH: return N86::EDI;
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
return RegNo-X86::ST0;
- case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
- case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
- return RegNo-X86::XMM0;
+ case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
+ case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
+ return II->getRegisterInfo().getDwarfRegNum(RegNo) -
+ II->getRegisterInfo().getDwarfRegNum(X86::XMM0);
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ return II->getRegisterInfo().getDwarfRegNum(RegNo) -
+ II->getRegisterInfo().getDwarfRegNum(X86::XMM8);
default:
assert(MRegisterInfo::isVirtualRegister(RegNo) &&
@@ -199,7 +262,7 @@ void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) {
MCE.emitByte(ModRMByte(SS, Index, Base));
}
-void Emitter::emitConstant(unsigned Val, unsigned Size) {
+void Emitter::emitConstant(uint64_t Val, unsigned Size) {
// Output the constant in little endian byte order...
for (unsigned i = 0; i != Size; ++i) {
MCE.emitByte(Val & 255);
@@ -214,7 +277,7 @@ static bool isDisp8(int Value) {
}
void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
- int DispVal) {
+ int DispVal, unsigned PCAdj) {
// If this is a simple integer displacement that doesn't require a relocation,
// emit it now.
if (!RelocOp) {
@@ -225,14 +288,27 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
// Otherwise, this is something that requires a relocation. Emit it as such
// now.
if (RelocOp->isGlobalAddress()) {
- emitGlobalAddressForPtr(RelocOp->getGlobal(), RelocOp->getOffset());
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial.
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ emitGlobalAddressForPtr(RelocOp->getGlobal(), Is64BitMode,
+ RelocOp->getOffset(), PCAdj);
+ } else if (RelocOp->isConstantPoolIndex()) {
+ // Must be in 64-bit mode.
+ emitPCRelativeConstPoolAddress(RelocOp->getConstantPoolIndex(),
+ RelocOp->getOffset(), PCAdj);
+ } else if (RelocOp->isJumpTableIndex()) {
+ // Must be in 64-bit mode.
+ emitPCRelativeJumpTableAddress(RelocOp->getJumpTableIndex(), PCAdj);
} else {
assert(0 && "Unknown value to relocate!");
}
}
void Emitter::emitMemModRMByte(const MachineInstr &MI,
- unsigned Op, unsigned RegOpcodeField) {
+ unsigned Op, unsigned RegOpcodeField,
+ unsigned PCAdj) {
const MachineOperand &Op3 = MI.getOperand(Op+3);
int DispVal = 0;
const MachineOperand *DispForReloc = 0;
@@ -241,10 +317,18 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
if (Op3.isGlobalAddress()) {
DispForReloc = &Op3;
} else if (Op3.isConstantPoolIndex()) {
- DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
- DispVal += Op3.getOffset();
+ if (Is64BitMode) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
+ DispVal += Op3.getOffset();
+ }
} else if (Op3.isJumpTableIndex()) {
- DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
+ if (Is64BitMode) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
+ }
} else {
DispVal = Op3.getImm();
}
@@ -256,12 +340,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
unsigned BaseReg = Base.getReg();
// Is a SIB byte needed?
- if (IndexReg.getReg() == 0 && BaseReg != X86::ESP) {
+ if (IndexReg.getReg() == 0 &&
+ (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
if (BaseReg == 0) { // Just a displacement?
// Emit special case [disp32] encoding
MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
- emitDisplacementField(DispForReloc, DispVal);
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
} else {
unsigned BaseRegNo = getX86RegNum(BaseReg);
if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
@@ -274,12 +359,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
} else {
// Emit the most general non-SIB encoding: [REG+disp32]
MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
- emitDisplacementField(DispForReloc, DispVal);
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
}
}
} else { // We need a SIB byte, so start by outputting the ModR/M byte first
- assert(IndexReg.getReg() != X86::ESP && "Cannot use ESP as index reg!");
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
bool ForceDisp32 = false;
bool ForceDisp8 = false;
@@ -292,7 +378,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
// Emit the normal disp32 encoding.
MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
ForceDisp32 = true;
- } else if (DispVal == 0 && BaseReg != X86::EBP) {
+ } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
// Emit no displacement ModR/M byte
MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
} else if (isDisp8(DispVal)) {
@@ -327,7 +413,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
if (ForceDisp8) {
emitConstant(DispVal, 1);
} else if (DispVal != 0 || ForceDisp32) {
- emitDisplacementField(DispForReloc, DispVal);
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
}
}
}
@@ -337,11 +423,131 @@ static unsigned sizeOfImm(const TargetInstrDescriptor &Desc) {
case X86II::Imm8: return 1;
case X86II::Imm16: return 2;
case X86II::Imm32: return 4;
+ case X86II::Imm64: return 8;
default: assert(0 && "Immediate size not set!");
return 0;
}
}
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
+/// e.g. r8, xmm8, etc.
+bool Emitter::isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isRegister()) return false;
+ unsigned RegNo = MO.getReg();
+ int DWNum = II->getRegisterInfo().getDwarfRegNum(RegNo);
+ if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::R8) &&
+ DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::R15))
+ return true;
+ if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::XMM8) &&
+ DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::XMM15))
+ return true;
+ return false;
+}
+
+inline static bool isX86_64TruncToByte(unsigned oc) {
+ return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 ||
+ oc == X86::TRUNC_16to8);
+}
+
+
+inline static bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+unsigned Emitter::determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDescriptor &Desc = II->get(Opcode);
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ if (MI.getNumOperands()) {
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ bool isTrunc8 = isX86_64TruncToByte(Opcode);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isRegister()) {
+ unsigned Reg = MO.getReg();
+ // Trunc to byte are actually movb. The real source operand is the low
+ // byte of the register.
+ if (isTrunc8 && i == 1)
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ if (isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (unsigned i = 1; i != 5; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isRegister()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ if (MI.getNumOperands() >= 5 &&
+ isX86_64ExtendedReg(MI.getOperand(4)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isRegister()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
void Emitter::emitInstruction(const MachineInstr &MI) {
NumEmitted++; // Keep track of the # of mi's emitted
@@ -354,18 +560,22 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
// Emit the operand size opcode prefix as needed.
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);
+ // Emit the address size opcode prefix as needed.
+ if (Desc.TSFlags & X86II::AdSize) MCE.emitByte(0x67);
+
+ bool Need0FPrefix = false;
switch (Desc.TSFlags & X86II::Op0Mask) {
case X86II::TB:
- MCE.emitByte(0x0F); // Two-byte opcode prefix
+ Need0FPrefix = true; // Two-byte opcode prefix
break;
case X86II::REP: break; // already handled.
case X86II::XS: // F3 0F
MCE.emitByte(0xF3);
- MCE.emitByte(0x0F);
+ Need0FPrefix = true;
break;
case X86II::XD: // F2 0F
MCE.emitByte(0xF2);
- MCE.emitByte(0x0F);
+ Need0FPrefix = true;
break;
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
@@ -377,6 +587,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
case 0: break; // No prefix!
}
+ if (Is64BitMode) {
+ // REX prefix
+ unsigned REX = determineREX(MI);
+ if (REX)
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
// If this is a two-address instruction, skip one of the register operands.
unsigned CurOp = 0;
CurOp += (Desc.Flags & M_2_ADDR_FLAG) != 0;
@@ -397,6 +618,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
case X86::IMPLICIT_DEF_GR8:
case X86::IMPLICIT_DEF_GR16:
case X86::IMPLICIT_DEF_GR32:
+ case X86::IMPLICIT_DEF_GR64:
case X86::IMPLICIT_DEF_FR32:
case X86::IMPLICIT_DEF_FR64:
case X86::IMPLICIT_DEF_VR64:
@@ -417,7 +639,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
} else if (MO.isGlobalAddress()) {
bool isTailCall = Opcode == X86::TAILJMPd ||
Opcode == X86::TAILJMPr || Opcode == X86::TAILJMPm;
- emitGlobalAddressForCall(MO.getGlobal(), isTailCall);
+ emitGlobalAddressForCall(MO.getGlobal(), !isTailCall);
} else if (MO.isExternalSymbol()) {
emitExternalSymbolAddress(MO.getSymbolName(), true);
} else if (MO.isImmediate()) {
@@ -434,15 +656,15 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
if (CurOp != MI.getNumOperands()) {
const MachineOperand &MO1 = MI.getOperand(CurOp++);
if (MO1.isGlobalAddress()) {
- assert(sizeOfImm(Desc) == 4 &&
+ assert(sizeOfImm(Desc) == TD->getPointerSize() &&
"Don't know how to emit non-pointer values!");
- emitGlobalAddressForPtr(MO1.getGlobal(), MO1.getOffset());
+ emitGlobalAddressForPtr(MO1.getGlobal(), Is64BitMode, MO1.getOffset());
} else if (MO1.isExternalSymbol()) {
- assert(sizeOfImm(Desc) == 4 &&
+ assert(sizeOfImm(Desc) == TD->getPointerSize() &&
"Don't know how to emit non-pointer values!");
emitExternalSymbolAddress(MO1.getSymbolName(), false);
} else if (MO1.isJumpTableIndex()) {
- assert(sizeOfImm(Desc) == 4 &&
+ assert(sizeOfImm(Desc) == TD->getPointerSize() &&
"Don't know how to emit non-pointer values!");
emitConstant(MCE.getJumpTableEntryAddress(MO1.getJumpTableIndex()), 4);
} else {
@@ -460,13 +682,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
}
- case X86II::MRMDestMem:
+ case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(CurOp+4).getReg()));
CurOp += 5;
if (CurOp != MI.getNumOperands())
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
+ }
case X86II::MRMSrcReg:
MCE.emitByte(BaseOpcode);
@@ -477,13 +700,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
- case X86II::MRMSrcMem:
+ case X86II::MRMSrcMem: {
+ unsigned PCAdj = (CurOp+5 != MI.getNumOperands()) ? sizeOfImm(Desc) : 0;
+
MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()));
+ emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+ PCAdj);
CurOp += 5;
if (CurOp != MI.getNumOperands())
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
+ }
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
@@ -500,9 +727,13 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ unsigned PCAdj = (CurOp+4 != MI.getNumOperands()) ?
+ (MI.getOperand(CurOp+4).isImmediate() ? sizeOfImm(Desc) : 4) : 0;
+
MCE.emitByte(BaseOpcode);
- emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m);
+ emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
CurOp += 4;
if (CurOp != MI.getNumOperands()) {
@@ -510,13 +741,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
if (MO.isImmediate())
emitConstant(MO.getImm(), sizeOfImm(Desc));
else if (MO.isGlobalAddress())
- emitGlobalAddressForPtr(MO.getGlobal(), MO.getOffset());
+ emitGlobalAddressForPtr(MO.getGlobal(), Is64BitMode, MO.getOffset());
else if (MO.isJumpTableIndex())
emitConstant(MCE.getJumpTableEntryAddress(MO.getJumpTableIndex()), 4);
else
assert(0 && "Unknown operand!");
}
break;
+ }
case X86II::MRMInitReg:
MCE.emitByte(BaseOpcode);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c5ffb06fee..4287ab2791 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -30,8 +30,9 @@
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
#include <deque>
#include <iostream>
@@ -58,16 +59,19 @@ namespace {
int FrameIndex;
} Base;
+ bool isRIPRel; // RIP relative?
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
Constant *CP;
+ const char *ES;
+ int JT;
unsigned Align; // CP alignment.
X86ISelAddressMode()
- : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
- CP(0), Align(0) {
+ : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
+ GV(0), CP(0), ES(0), JT(-1), Align(0) {
}
};
}
@@ -92,6 +96,10 @@ namespace {
///
bool FastISel;
+ /// TM - Keep a reference to X86TargetMachine.
+ ///
+ X86TargetMachine &TM;
+
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
X86TargetLowering X86Lowering;
@@ -100,12 +108,14 @@ namespace {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
+ /// GlobalBaseReg - keeps track of the virtual register mapped onto global
+ /// base register.
unsigned GlobalBaseReg;
public:
- X86DAGToDAGISel(X86TargetMachine &TM, bool fast)
+ X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
: SelectionDAGISel(X86Lowering),
- ContainsFPCode(false), FastISel(fast),
+ ContainsFPCode(false), FastISel(fast), TM(tm),
X86Lowering(*TM.getTargetLowering()),
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
@@ -156,13 +166,22 @@ namespace {
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
- CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
- Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
- : (AM.CP ?
- CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
- : getI32Imm(AM.Disp));
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+ else
+ Disp = getI32Imm(AM.Disp);
}
/// getI8Imm - Return a target constant with the specified value, of type
@@ -476,26 +495,56 @@ void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
/// addressing mode
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot) {
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRel) {
+ if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getValue();
+ if (isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ }
+ return true;
+ }
+
int id = N.Val->getNodeId();
bool Available = isSelected(id);
switch (N.getOpcode()) {
default: break;
- case ISD::Constant:
- AM.Disp += cast<ConstantSDNode>(N)->getValue();
- return false;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getValue();
+ if (isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ break;
+ }
case X86ISD::Wrapper:
- // If both base and index components have been picked, we can't fit
- // the result available in the register in the addressing mode. Duplicate
- // GlobalAddress or ConstantPool as displacement.
- if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
+ // If value is available in a register both base and index components have
+ // been picked, we can't fit the result available in the register in the
+ // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
+
+ // Can't fit GV or CP in addressing mode for X86-64 medium or large code
+ // model since the displacement field is 32-bit. Ok for small code model.
+
+ // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
+ // relative addressing mode.
+ if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
+ (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
+ bool isRIP = Subtarget->is64Bit();
+ if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase))
+ break;
if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
if (AM.CP == 0) {
AM.CP = CP->get();
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
+ if (isRIP)
+ AM.isRIPRel = true;
return false;
}
} else if (GlobalAddressSDNode *G =
@@ -503,6 +552,20 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
if (AM.GV == 0) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
+ if (isRIP)
+ AM.isRIPRel = true;
+ return false;
+ }
+ } else if (isRoot && isRIP) {
+ if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
+ AM.ES = S->getSymbol();
+ AM.isRIPRel = true;
+ return false;
+ } else if (JumpTableSDNode *J =
+ dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
+ AM.JT = J->getIndex();
+ AM.isRIPRel = true;
return false;
}
}
@@ -533,7 +596,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
- AM.Disp += AddVal->getValue() << Val;
+ uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ AM.IndexReg = ShVal;
} else {
AM.IndexReg = ShVal;
}
@@ -563,7 +630,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
- AM.Disp += AddVal->getValue() * CN->getValue();
+ uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ Reg = N.Val->getOperand(0);
} else {
Reg = N.Val->getOperand(0);
}
@@ -641,13 +712,14 @@ bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
if (MatchAddress(N, AM))
return false;
+ MVT::ValueType VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.Val)
- AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.Val)
- AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
+ AM.IndexReg = CurDAG->getRegister(0, VT);
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
@@ -662,19 +734,20 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
if (MatchAddress(N, AM))
return false;
+ MVT::ValueType VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.Val)
Complexity = 1;
else
- AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.Val)
Complexity++;
else
- AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
+ AM.IndexReg = CurDAG->getRegister(0, VT);
if (AM.Scale > 2)
Complexity += 2;
@@ -687,8 +760,14 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
- if (AM.GV || AM.CP)
- Complexity += 2;
+ if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
Complexity++;
@@ -721,6 +800,7 @@ static bool isRegister0(SDOperand Op) {
/// base address to use for accessing globals into a register.
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = BB->getParent()->front();
@@ -732,7 +812,7 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
}
- return CurDAG->getRegister(GlobalBaseReg, MVT::i32).Val;
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
}
static SDNode *FindCallStartFromCall(SDNode *Node) {
@@ -776,9 +856,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
// code and is matched first so to prevent it from being turned into
// LEA32r X+c.
+ // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
+ MVT::ValueType PtrVT = TLI.getPointerTy();
SDOperand N0 = N.getOperand(0);
SDOperand N1 = N.getOperand(1);
- if (N.Val->getValueType(0) == MVT::i32 &&
+ if (N.Val->getValueType(0) == PtrVT &&
N0.getOpcode() == X86ISD::Wrapper &&
N1.getOpcode() == ISD::Constant) {
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
@@ -786,17 +868,23 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
// TODO: handle ExternalSymbolSDNode.
if (GlobalAddressSDNode *G =
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
- C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
+ C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
G->getOffset() + Offset);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
- C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
+ C = CurDAG->getTargetConstantPool(CP->get(), PtrVT,
CP->getAlignment(),
CP->getOffset()+Offset);
}
- if (C.Val)
- return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C);
+ if (C.Val) {
+ if (Subtarget->is64Bit()) {
+ SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
+ CurDAG->getRegister(0, PtrVT), C };
+ return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
+ } else
+ return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
+ }
}
// Other cases are handled by auto-generated code.
@@ -811,6 +899,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
else
switch (NVT) {
@@ -818,6 +907,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
unsigned LoReg, HiReg;
@@ -826,6 +916,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
}
SDOperand N0 = Node->getOperand(0);
@@ -899,6 +990,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
else
switch (NVT) {
@@ -906,6 +998,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
unsigned LoReg, HiReg;
@@ -927,6 +1020,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
ClrOpcode = X86::MOV32r0;
SExtOpcode = X86::CDQ;
break;
+ case MVT::i64:
+ LoReg = X86::RAX; HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
}
SDOperand N0 = Node->getOperand(0);
@@ -994,7 +1092,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
}
case ISD::TRUNCATE: {
- if (NVT == MVT::i8) {
+ if (!Subtarget->is64Bit() && NVT == MVT::i8) {
unsigned Opc2;
MVT::ValueType VT;
switch (Node->getOperand(0).getValueType()) {
@@ -1002,12 +1100,12 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i16:
Opc = X86::MOV16to16_;
VT = MVT::i16;
- Opc2 = X86::TRUNC_GR16_GR8;
+ Opc2 = X86::TRUNC_16_to8;
break;
case MVT::i32:
Opc = X86::MOV32to32_;
VT = MVT::i32;
- Opc2 = X86::TRUNC_GR32_GR8;
+ Opc2 = X86::TRUNC_32_to8;
break;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 964da18035..63ac0e56d8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -42,6 +42,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSE = Subtarget->hasSSE2();
+ X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
// Set up the TargetLowering object.
@@ -51,7 +52,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setSetCCResultContents(ZeroOrOneSetCCResult);
setSchedulingPreference(SchedulingForRegPressure);
setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
- setStackPointerRegisterToSaveRestore(X86::ESP);
+ setStackPointerRegisterToSaveRestore(X86StackPtr);
if (!Subtarget->isTargetDarwin())
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
@@ -71,6 +72,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i8, X86::GR8RegisterClass);
addRegisterClass(MVT::i16, X86::GR16RegisterClass);
addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, X86::GR64RegisterClass);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
@@ -78,11 +81,16 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
- if (X86ScalarSSE)
- // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
- else
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ } else {
+ if (X86ScalarSSE)
+ // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
+ else
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ }
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
@@ -96,10 +104,11 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
- // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
- // isn't legal.
- setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
- setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ if (!Subtarget->is64Bit()) {
+ // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ }
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
@@ -119,14 +128,19 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
- if (X86ScalarSSE && !Subtarget->hasSSE3())
- // Expand FP_TO_UINT into a select.
- // FIXME: We would like to use a Custom expander here eventually to do
- // the optimal thing for SSE vs. the default expansion in the legalizer.
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
- else
- // With SSE3 we can use fisttpll to convert to a signed i64.
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else {
+ if (X86ScalarSSE && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ }
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
@@ -135,12 +149,15 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
+
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
@@ -150,13 +167,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
setOperationAction(ISD::SELECT , MVT::i8 , Promote);
-
// X86 wants to expand cmov itself.
setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
@@ -167,6 +189,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
// X86 ret instruction may pop stack.
setOperationAction(ISD::RET , MVT::Other, Custom);
// Darwin ABI issue.
@@ -174,6 +200,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ }
// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
@@ -198,6 +230,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
@@ -441,7 +475,7 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG)
if (ObjXMMRegs) {
// Passed in a XMM register.
unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
+ X86::VR128RegisterClass);
ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
ArgValues.push_back(ArgValue);
NumXMMRegs += ObjXMMRegs;
@@ -466,8 +500,9 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG)
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
if (isVarArg)
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
- ReturnAddrIndex = 0; // No return address slot generated yet.
- BytesToPopOnReturn = 0; // Callee pops nothing.
+ RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
+ ReturnAddrIndex = 0; // No return address slot generated yet.
+ BytesToPopOnReturn = 0; // Callee pops nothing.
BytesCallerReserves = ArgOffset;
// If this is a struct return on Darwin/X86, the callee pops the hidden struct
@@ -539,7 +574,7 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
NumXMMRegs = 0;
std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
std::vector<SDOperand> MemOpChains;
- SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
+ SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
for (unsigned i = 0; i != NumOps; ++i) {
SDOperand Arg = Op.getOperand(5+2*i);
@@ -751,6 +786,507 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
return Res.getValue(Op.ResNo);
}
+
+//===----------------------------------------------------------------------===//
+// X86-64 C Calling Convention implementation
+//===----------------------------------------------------------------------===//
+
+/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified
+/// type should be passed. If it is through stack, returns the size of the stack
+/// slot; if it is through integer or XMM register, returns the number of
+/// integer or XMM registers are needed.
+static void
+HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT,
+ unsigned NumIntRegs, unsigned NumXMMRegs,
+ unsigned &ObjSize, unsigned &ObjIntRegs,
+ unsigned &ObjXMMRegs) {
+ ObjSize = 0;
+ ObjIntRegs = 0;
+ ObjXMMRegs = 0;
+
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ if (NumIntRegs < 6)
+ ObjIntRegs = 1;
+ else {
+ switch (ObjectVT) {
+ default: break;
+ case MVT::i8: ObjSize = 1; break;
+ case MVT::i16: ObjSize = 2; break;
+ case MVT::i32: ObjSize = 4; break;
+ case MVT::i64: ObjSize = 8; break;
+ }
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ if (NumXMMRegs < 8)
+ ObjXMMRegs = 1;
+ else {
+ switch (ObjectVT) {
+ default: break;
+ case MVT::f32: ObjSize = 4; break;
+ case MVT::f64: ObjSize = 8; break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64: ObjSize = 16; break;
+ }
+ break;
+ }
+ }
+}
+
+SDOperand
+X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
+ unsigned NumArgs = Op.Val->getNumValues() - 1;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SDOperand Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ std::vector<SDOperand> ArgValues;
+
+ // Add DAG nodes to load the arguments... On entry to a function on the X86,
+ // the stack frame looks like this:
+ //
+ // [RSP] -- return address
+ // [RSP + 8] -- first nonreg argument (leftmost lexically)
+ // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size
+ // ...
+ //
+ unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
+ unsigned NumIntRegs = 0; // Int regs used for parameter passing.
+ unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
+
+ static const unsigned GPR8ArgRegs[] = {
+ X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B
+ };
+ static const unsigned GPR16ArgRegs[] = {
+ X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W
+ };
+ static const unsigned GPR32ArgRegs[] = {
+ X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+ };
+ static const unsigned GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+ for (unsigned i = 0; i < NumArgs; ++i) {
+ MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
+ unsigned ArgIncrement = 8;
+ unsigned ObjSize = 0;
+ unsigned ObjIntRegs = 0;
+ unsigned ObjXMMRegs = 0;
+
+ // FIXME: __int128 and long double support?
+ HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
+ ObjSize, ObjIntRegs, ObjXMMRegs);
+ if (ObjSize > 8)
+ ArgIncrement = ObjSize;
+
+ unsigned Reg = 0;
+ SDOperand ArgValue;
+ if (ObjIntRegs || ObjXMMRegs) {
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64: {
+ TargetRegisterClass *RC = NULL;
+ switch (ObjectVT) {
+ default: break;
+ case MVT::i8:
+ RC = X86::GR8RegisterClass;
+ Reg = GPR8ArgRegs[NumIntRegs];
+ break;
+ case MVT::i16:
+ RC = X86::GR16RegisterClass;
+ Reg = GPR16ArgRegs[NumIntRegs];
+ break;
+ case MVT::i32:
+ RC = X86::GR32RegisterClass;
+ Reg = GPR32ArgRegs[NumIntRegs];
+ break;
+ case MVT::i64:
+ RC = X86::GR64RegisterClass;
+ Reg = GPR64ArgRegs[NumIntRegs];
+ break;
+ }
+ Reg = AddLiveIn(MF, Reg, RC);
+ ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
+ break;
+ }
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64: {
+ TargetRegisterClass *RC= (ObjectVT == MVT::f32) ?
+ X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ?
+ X86::FR64RegisterClass : X86::VR128RegisterClass);
+ Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC);
+ ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
+ break;
+ }
+ }
+ NumIntRegs += ObjIntRegs;
+ NumXMMRegs += ObjXMMRegs;
+ } else if (ObjSize) {
+ // XMM arguments have to be aligned on 16-byte boundary.
+ if (ObjSize == 16)
+ ArgOffset = ((ArgOffset + 15) / 16) * 16;
+ // Create the SelectionDAG nodes corresponding to a load from this
+ // parameter.
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
+ DAG.getSrcValue(NULL));
+ ArgOffset += ArgIncrement; // Move on to the next argument.
+ }
+
+ ArgValues.push_back(ArgValue);
+ }
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
+ RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
+
+ // Store the integer parameter registers.
+ std::vector<SDOperand> MemOps;
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getConstant(VarArgsGPOffset, getPointerTy()));
+ for (; NumIntRegs != 6; ++NumIntRegs) {
+ unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+ SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
+ Val, FIN, DAG.getSrcValue(NULL));
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getConstant(VarArgsFPOffset, getPointerTy()));
+ for (; NumXMMRegs != 8; ++NumXMMRegs) {
+ unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
+ SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
+ Val, FIN, DAG.getSrcValue(NULL));
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ ReturnAddrIndex = 0; // No return address slot generated yet.
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+ BytesCallerReserves = ArgOffset;
+
+ // Return the new list of results.
+ std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
+ Op.Val->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
+}
+
+SDOperand
+X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Chain = Op.getOperand(0);
+ unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
+ SDOperand Callee = Op.getOperand(4);
+ MVT::ValueType RetVT= Op.Val->getValueType(0);
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+
+ // Count how many bytes are to be pushed on the stack.
+ unsigned NumBytes = 0;
+ unsigned NumIntRegs = 0; // Int regs used for parameter passing.
+ unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
+
+ static const unsigned GPR8ArgRegs[] = {
+ X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B
+ };
+ static const unsigned GPR16ArgRegs[] = {
+ X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W
+ };
+ static const unsigned GPR32ArgRegs[] = {
+ X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+ };
+ static const unsigned GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Op.getOperand(5+2*i);
+ MVT::ValueType ArgVT = Arg.getValueType();
+
+ switch (ArgVT) {
+ default: assert(0 && "Unknown value type!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ if (NumIntRegs < 6)
+ ++NumIntRegs;
+ else
+ NumBytes += 8;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ if (NumXMMRegs < 8)
+ NumXMMRegs++;
+ else if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ NumBytes += 8;
+ else {
+ // XMM arguments have to be aligned on 16-byte boundary.
+ NumBytes = ((NumBytes + 15) / 16) * 16;
+ NumBytes += 16;
+ }
+ break;
+ }
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+
+ // Arguments go on the stack in reverse order, as specified by the ABI.
+ unsigned ArgOffset = 0;
+ NumIntRegs = 0;
+ NumXMMRegs = 0;
+ std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+ std::vector<SDOperand> MemOpChains;
+ SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Op.getOperand(5+2*i);
+ MVT::ValueType ArgVT = Arg.getValueType();
+
+ switch (ArgVT) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ if (NumIntRegs < 6) {
+ unsigned Reg = 0;
+ switch (ArgVT) {
+ default: break;
+ case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break;
+ case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break;
+ case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break;
+ case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break;
+ }
+ RegsToPass.push_back(std::make_pair(Reg, Arg));
+ ++NumIntRegs;
+ } else {
+ SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
+ Arg, PtrOff, DAG.getSrcValue(NULL)));
+ ArgOffset += 8;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ if (NumXMMRegs < 8) {
+ RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
+ NumXMMRegs++;
+ } else {
+ if (ArgVT != MVT::f32 && ArgVT != MVT::f64) {
+ // XMM arguments have to be aligned on 16-byte boundary.
+ ArgOffset = ((ArgOffset + 15) / 16) * 16;
+ }
+ SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
+ Arg, PtrOff, DAG.getSrcValue(NULL)));
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ ArgOffset += 8;
+ else
+ ArgOffset += 16;
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isVarArg) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+ Chain = DAG.getCopyToReg(Chain, X86::AL,
+ DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ std::vector<MVT::ValueType> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ std::vector<SDOperand> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+
+ // FIXME: Do not generate X86ISD::TAILCALL for now.
+ Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ if (RetVT != MVT::Other)
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
+ Ops.push_back(DAG.getConstant(0, getPointerTy()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ if (RetVT != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDOperand> ResultVals;
+ NodeTys.clear();
+ switch (RetVT) {
+ default: assert(0 && "Unknown value type to return!");
+ case MVT::Other: break;
+ case MVT::i8:
+ Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i8);
+ break;
+ case MVT::i16:
+ Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i16);
+ break;
+ case MVT::i32:
+ Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i32);
+ break;
+ case MVT::i64:
+ if (Op.Val->getValueType(1) == MVT::i64) {
+ // FIXME: __int128 support?
+ Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64,
+ Chain.getValue(2)).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i64);
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+ NodeTys.push_back(MVT::i64);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ // FIXME: long double support?
+ Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(RetVT);
+ break;
+ }
+
+ // If the function returns void, just return the chain.
+ if (ResultVals.empty())
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ NodeTys.push_back(MVT::Other);
+ ResultVals.push_back(Chain);
+ SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
+ &ResultVals[0], ResultVals.size());
+ return Res.getValue(Op.ResNo);
+}
+
//===----------------------------------------------------------------------===//
// Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
@@ -949,6 +1485,7 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
ArgOffset += 4;
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
ReturnAddrIndex = 0; // No return address slot generated yet.
BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments.
BytesCallerReserves = 0;
@@ -1063,7 +1600,7 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){
NumIntRegs = 0;
std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
std::vector<SDOperand> MemOpChains;
- SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
+ SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
for (unsigned i = 0; i != NumOps; ++i) {
SDOperand Arg = Op.getOperand(5+2*i);
@@ -1273,10 +1810,13 @@ SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
MachineFunction &MF = DAG.getMachineFunction();
- ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
+ if (Subtarget->is64Bit())
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
+ else
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
}
- return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
@@ -1291,11 +1831,11 @@ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
if (!isFrameAddress)
// Just load the return address
- Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
+ Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI,
DAG.getSrcValue(NULL));
else
- Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
- DAG.getConstant(4, MVT::i32));
+ Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
+ DAG.getConstant(4, getPointerTy()));
}
return std::make_pair(Result, Chain);
}
@@ -2184,7 +2724,7 @@ static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, TargetLowering &TLI) {
if (NumNonZero > 8)
return SDOperand();
@@ -2217,7 +2757,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
- DAG.getConstant(i/2, MVT::i32));
+ DAG.getConstant(i/2, TLI.getPointerTy()));
}
}
@@ -2228,7 +2768,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, TargetLowering &TLI) {
if (NumNonZero > 4)
return SDOperand();
@@ -2245,7 +2785,7 @@ static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
- DAG.getConstant(i, MVT::i32));
+ DAG.getConstant(i, TLI.getPointerTy()));
}
}
@@ -2324,12 +2864,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8) {
- SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG);
+ SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
if (V.Val) return V;
}
if (EVTBits == 16) {
- SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG);
+ SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
if (V.Val) return V;
}
@@ -2791,7 +3333,8 @@ X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
CP->getAlignment()));
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ if (!Subtarget->is64Bit() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
}
@@ -2807,7 +3350,8 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
getPointerTy()));
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ if (!Subtarget->is64Bit() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
@@ -2818,7 +3362,7 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
// not the GV offset field.
if (getTargetMachine().getRelocationModel() != Reloc::Static &&
DarwinGVRequiresExtraLoad(GV))
- Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
+ Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(),
Result, DAG.getSrcValue(NULL));
}
@@ -2833,7 +3377,8 @@ X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
getPointerTy()));
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ if (!Subtarget->is64Bit() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
@@ -3234,7 +3779,8 @@ SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
getPointerTy()));
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ if (!Subtarget->is64Bit() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
@@ -3245,7 +3791,9 @@ SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (CallingConv == CallingConv::Fast && EnableFastCC)
+ if (Subtarget->is64Bit())
+ return LowerX86_64CCCCallTo(Op, DAG);
+ else if (CallingConv == CallingConv::Fast && EnableFastCC)
return LowerFastCCCallTo(Op, DAG);
else
return LowerCCCCallTo(Op, DAG);
@@ -3264,18 +3812,25 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
case 3: {
MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
- if (MVT::isVector(ArgVT)) {
+ if (MVT::isVector(ArgVT) ||
+ (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) {
// Integer or FP vector result -> XMM0.
if (DAG.getMachineFunction().liveout_empty())
DAG.getMachineFunction().addLiveOut(X86::XMM0);
Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
SDOperand());
} else if (MVT::isInteger(ArgVT)) {
- // Integer result -> EAX
+ // Integer result -> EAX / RAX.
+ // The C calling convention guarantees the return value has been
+ // promoted to at least MVT::i32. The X86-64 ABI doesn't require the
+ // value to be promoted MVT::i64. So we don't have to extend it to
+ // 64-bit. Return the value in EAX, but mark RAX as liveout.
+ unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
if (DAG.getMachineFunction().liveout_empty())
- DAG.getMachineFunction().addLiveOut(X86::EAX);
+ DAG.getMachineFunction().addLiveOut(Reg);
- Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
+ Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX;
+ Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1),
SDOperand());
} else if (!X86ScalarSSE) {
// FP return with fp-stack value.
@@ -3329,19 +3884,22 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
}
break;
}
- case 5:
+ case 5: {
+ unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX;
if (DAG.getMachineFunction().liveout_empty()) {
- DAG.getMachineFunction().addLiveOut(X86::EAX);
- DAG.getMachineFunction().addLiveOut(X86::EDX);
+ DAG.getMachineFunction().addLiveOut(Reg1);
+ DAG.getMachineFunction().addLiveOut(Reg2);
}
- Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(3),
+ Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3),
SDOperand());
- Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
+ Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1));
break;
+ }
}
return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
- Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
+ Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
Copy.getValue(1));
}
@@ -3355,7 +3913,9 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (CC == CallingConv::Fast && EnableFastCC)
+ if (Subtarget->is64Bit())
+ return LowerX86_64CCCArguments(Op, DAG);
+ else if (CC == CallingConv::Fast && EnableFastCC)
return LowerFastCCArguments(Op, DAG);
else
return LowerCCCArguments(Op, DAG);
@@ -3394,38 +3954,47 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
bool TwoRepStos = false;
if (ValC) {
unsigned ValReg;
- unsigned Val = ValC->getValue() & 255;
+ uint64_t Val = ValC->getValue() & 255;
// If the value is a constant, then we can potentially use larger sets.
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
- Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
- BytesLeft = I->getValue() % 2;
- Val = (Val << 8) | Val;
ValReg = X86::AX;
+ Val = (Val << 8) | Val;
break;
- case 0: // DWORD aligned
+ case 0: // DWORD aligned
AVT = MVT::i32;
- if (I) {
- Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
- BytesLeft = I->getValue() % 4;
- } else {
- Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
- DAG.getConstant(2, MVT::i8));
- TwoRepStos = true;
- }
+ ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
- ValReg = X86::EAX;
+ if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
break;
default: // Byte aligned
AVT = MVT::i8;
- Count = Op.getOperand(3);
ValReg = X86::AL;
+ Count = Op.getOperand(3);
break;
}
+ if (AVT > MVT::i8) {
+ if (I) {
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ BytesLeft = I->getValue() % UBytes;
+ } else {
+ assert(AVT >= MVT::i32 &&
+ "Do not use rep;stos if not at least DWORD aligned");
+ Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
+ Op.getOperand(3), DAG.getConstant(2, MVT::i8));
+ TwoRepStos = true;
+ }
+ }
+
Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
InFlag);
InFlag = Chain.getValue(1);
@@ -3436,9 +4005,11 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
InFlag = Chain.getValue(1);
}
- Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
+ Op.getOperand(1), InFlag);
InFlag = Chain.getValue(1);
std::vector<MVT::ValueType> Tys;
@@ -3455,8 +4026,9 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
Count = Op.getOperand(3);
MVT::ValueType CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
- DAG.getConstant(3, CVT));
- Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+ Left, InFlag);
InFlag = Chain.getValue(1);
Tys.clear();
Tys.push_back(MVT::Other);
@@ -3467,12 +4039,23 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
} else if (BytesLeft) {
- // Issue stores for the last 1 - 3 bytes.
+ // Issue stores for the last 1 - 7 bytes.
SDOperand Value;
unsigned Val = ValC->getValue() & 255;
unsigned Offset = I->getValue() - BytesLeft;
SDOperand DstAddr = Op.getOperand(1);
MVT::ValueType AddrVT = DstAddr.getValueType();
+ if (BytesLeft >= 4) {
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ Value = DAG.getConstant(Val, MVT::i32);
+ Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
+ DAG.getNode(ISD::ADD, AddrVT, DstAddr,
+ DAG.getConstant(Offset, AddrVT)),
+ DAG.getSrcValue(NULL));
+ BytesLeft -= 4;
+ Offset += 4;
+ }
if (BytesLeft >= 2) {
Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
@@ -3482,7 +4065,6 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
BytesLeft -= 2;
Offset += 2;
}
-
if (BytesLeft == 1) {
Value = DAG.getConstant(Val, MVT::i8);
Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
@@ -3525,19 +4107,11 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
- Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
- BytesLeft = I->getValue() % 2;
break;
- case 0: // DWORD aligned
+ case 0: // DWORD aligned
AVT = MVT::i32;
- if (I) {
- Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
- BytesLeft = I->getValue() % 4;
- } else {
- Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
- DAG.getConstant(2, MVT::i8));
- TwoRepMovs = true;
- }
+ if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
+ AVT = MVT::i64;
break;
default: // Byte aligned
AVT = MVT::i8;
@@ -3545,12 +4119,29 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
break;
}
+ if (AVT > MVT::i8) {
+ if (I) {
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ BytesLeft = I->getValue() % UBytes;
+ } else {
+ assert(AVT >= MVT::i32 &&
+ "Do not use rep;movs if not at least DWORD aligned");
+ Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
+ Op.getOperand(3), DAG.getConstant(2, MVT::i8));
+ TwoRepMovs = true;
+ }
+ }
+
SDOperand InFlag(0, 0);
- Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
+ Op.getOperand(1), InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
+ Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
+ Op.getOperand(2), InFlag);
InFlag = Chain.getValue(1);
std::vector<MVT::ValueType> Tys;
@@ -3567,8 +4158,9 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
Count = Op.getOperand(3);
MVT::ValueType CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
- DAG.getConstant(3, CVT));
- Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+ Left, InFlag);
InFlag = Chain.getValue(1);
Tys.clear();
Tys.push_back(MVT::Other);
@@ -3579,13 +4171,26 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
} else if (BytesLeft) {
- // Issue loads and stores for the last 1 - 3 bytes.
+ // Issue loads and stores for the last 1 - 7 bytes.
unsigned Offset = I->getValue() - BytesLeft;
SDOperand DstAddr = Op.getOperand(1);
MVT::ValueType DstVT = DstAddr.getValueType();
SDOperand SrcAddr = Op.getOperand(2);
MVT::ValueType SrcVT = SrcAddr.getValueType();
SDOperand Value;
+ if (BytesLeft >= 4) {
+ Value = DAG.getLoad(MVT::i32, Chain,
+ DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getSrcValue(NULL));
+ Chain = Value.getValue(1);
+ Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
+ DAG.getNode(ISD::ADD, DstVT, DstAddr,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getSrcValue(NULL));
+ BytesLeft -= 4;
+ Offset += 4;
+ }
if (BytesLeft >= 2) {
Value = DAG.getLoad(MVT::i16, Chain,
DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
@@ -3635,12 +4240,51 @@ X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
- // vastart just stores the address of the VarArgsFrameIndex slot into the
- // memory location argument.
- // FIXME: Replace MVT::i32 with PointerTy
- SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
- return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
- Op.getOperand(1), Op.getOperand(2));
+ if (!Subtarget->is64Bit()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ std::vector<SDOperand> MemOps;
+ SDOperand FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(VarArgsGPOffset, MVT::i32),
+ FIN, Op.getOperand(2));
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(VarArgsFPOffset, MVT::i32),
+ FIN, Op.getOperand(2));
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
+ OVFIN, FIN, Op.getOperand(2));
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
+ RSFIN, FIN, Op.getOperand(2));
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
}
SDOperand
@@ -4333,6 +4977,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = Res.second = X86::GR32RegisterClass;
}
+ } else if (VT == MVT::i64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = Res.second = X86::GR64RegisterClass;
+ }
}
return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0901b2a770..72a282b2de 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -267,6 +267,9 @@ namespace llvm {
// X86TargetLowering - X86 Implementation of the TargetLowering interface
class X86TargetLowering : public TargetLowering {
int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int RegSaveFrameIndex; // X86-64 vararg func register save area.
+ unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
+ unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
int ReturnAddrIndex; // FrameIndex for return slot.
int BytesToPopOnReturn; // Number of arg bytes ret should pop.
int BytesCallerReserves; // Number of arg bytes caller makes.
@@ -347,6 +350,9 @@ namespace llvm {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
+ /// X86StackPtr - X86 physical register used as stack ptr.
+ unsigned X86StackPtr;
+
/// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
bool X86ScalarSSE;
@@ -354,6 +360,10 @@ namespace llvm {
SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG);
+ // X86-64 C Calling Convention implementation.
+ SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG);
+
// Fast Calling Convention implementation.
SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 9d8eab8ca5..81cd9e0831 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])),
- TM(tm), RI(*this) {
+ TM(tm), RI(tm, *this) {
}
@@ -30,7 +30,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned& sourceReg,
unsigned& destReg) const {
MachineOpCode oc = MI.getOpcode();
- if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
+ if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
+ oc == X86::MOV32rr || oc == X86::MOV64rr ||
oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
@@ -59,6 +60,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI,
case X86::MOV16_rm:
case X86::MOV32rm:
case X86::MOV32_rm:
+ case X86::MOV64rm:
case X86::FpLD64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
@@ -86,6 +88,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(MachineInstr *MI,
case X86::MOV16_mr:
case X86::MOV32mr:
case X86::MOV32_mr:
+ case X86::MOV64mr:
case X86::FpSTP64m:
case X86::MOVSSmr:
case X86::MOVSDmr:
@@ -145,16 +148,20 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr *MI) const {
switch (MI->getOpcode()) {
case X86::INC32r:
+ case X86::INC64_32r:
assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, 1);
case X86::INC16r:
+ case X86::INC64_16r:
if (DisableLEA16) return 0;
assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, 1);
case X86::DEC32r:
+ case X86::DEC64_32r:
assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, -1);
case X86::DEC16r:
+ case X86::DEC64_16r:
if (DisableLEA16) return 0;
assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, -1);
@@ -264,3 +271,10 @@ X86InstrInfo::reverseBranchCondition(MachineBasicBlock::iterator MI) const {
return BuildMI(*MBB, MBB->erase(MI), ROpcode, 1).addMBB(TMBB);
}
+const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (Subtarget->is64Bit())
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 7691798646..01b4cfffcc 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -18,6 +18,7 @@
#include "X86RegisterInfo.h"
namespace llvm {
+ class X86RegisterInfo;
class X86TargetMachine;
/// X86II - This namespace holds all of the target specific flags that
@@ -90,12 +91,18 @@ namespace X86II {
// instead of 32 bit data.
OpSize = 1 << 6,
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
// Op0Mask - There are several prefix bytes that are used to form two byte
// opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
// used to obtain the setting of this field. If no bits in this field is
// set, there is no prefix byte for obtaining a multibyte opcode.
//
- Op0Shift = 7,
+ Op0Shift = 8,
Op0Mask = 0xF << Op0Shift,
// TB - TwoByte - Set if this instruction has a two byte opcode, which
@@ -118,19 +125,29 @@ namespace X86II {
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
//===------------------------------------------------------------------===//
- // This two-bit field describes the size of an immediate operand. Zero is
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = 12,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
// unused so that we can tell if we forgot to set a value.
- ImmShift = 11,
- ImmMask = 3 << ImmShift,
+ ImmShift = 13,
+ ImmMask = 7 << ImmShift,
Imm8 = 1 << ImmShift,
Imm16 = 2 << ImmShift,
Imm32 = 3 << ImmShift,
+ Imm64 = 4 << ImmShift,
//===------------------------------------------------------------------===//
// FP Instruction Classification... Zero is non-fp instruction.
// FPTypeMask - Mask for all of the FP types...
- FPTypeShift = 13,
+ FPTypeShift = 16,
FPTypeMask = 7 << FPTypeShift,
// NotFP - The default, set for instructions that do not use FP registers.
@@ -162,9 +179,9 @@ namespace X86II {
// SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
SpecialFP = 7 << FPTypeShift,
- OpcodeShift = 16,
+ // Bits 19 -> 23 are unused
+ OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
- // Bits 25 -> 31 are unused
};
}
@@ -216,6 +233,8 @@ public:
virtual MachineBasicBlock::iterator
reverseBranchCondition(MachineBasicBlock::iterator MI) const;
+ const TargetRegisterClass *getPointerRegClass() const;
+
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified opcode number.
//
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 206faa129c..0f38aea60a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -39,7 +39,7 @@ def SDT_X86CallSeqStart : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
def SDT_X86CallSeqEnd : SDTypeProfile<0, 2, [ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
-def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
@@ -95,7 +95,7 @@ def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
let NumMIOperands = 4;
- let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
}
def i8mem : X86MemOperand<"printi8mem">;
@@ -107,6 +107,12 @@ def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f128mem : X86MemOperand<"printf128mem">;
+def lea32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let NumMIOperands = 4;
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
}
@@ -129,9 +135,9 @@ def brtarget : Operand<OtherVT>;
//
// Define X86 specific addressing mode.
-def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>;
-def leaaddr : ComplexPattern<iPTR, 4, "SelectLEAAddr",
- [add, mul, shl, or, frameindex]>;
+def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>;
+def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
+ [add, mul, shl, or, frameindex]>;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -158,11 +164,13 @@ def MRMInitReg : Format<32>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
-def HasMMX : Predicate<"Subtarget->hasMMX()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def FPStack : Predicate<"!Subtarget->hasSSE2()">;
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def FPStack : Predicate<"!Subtarget->hasSSE2()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">;
//===----------------------------------------------------------------------===//
// X86 specific pattern fragments.
@@ -171,13 +179,14 @@ def FPStack : Predicate<"!Subtarget->hasSSE2()">;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
// machine code emitter.
-class ImmType<bits<2> val> {
- bits<2> Value = val;
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
}
def NoImm : ImmType<0>;
def Imm8 : ImmType<1>;
def Imm16 : ImmType<2>;
def Imm32 : ImmType<3>;
+def Imm64 : ImmType<4>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
@@ -202,7 +211,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
Format Form = f;
bits<6> FormBits = Form.Value;
ImmType ImmT = i;
- bits<2> ImmTypeBits = ImmT.Value;
+ bits<3> ImmTypeBits = ImmT.Value;
dag OperandList = ops;
string AsmString = AsmStr;
@@ -210,9 +219,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
//
// Attributes specific to X86 instructions...
//
- bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
bits<4> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
FPFormat FPForm; // What flavor of FP instruction is this?
bits<3> FPFormBits = 0;
}
@@ -226,6 +237,8 @@ class Imp<list<Register> uses, list<Register> defs> {
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
class TB { bits<4> Prefix = 1; }
class REP { bits<4> Prefix = 2; }
class D8 { bits<4> Prefix = 3; }
@@ -276,8 +289,6 @@ def i32immSExt8 : PatLeaf<(i32 imm), [{
}]>;
// Helper fragments for loads.
-def loadiPTR : PatFrag<(ops node:$ptr), (iPTR (load node:$ptr))>;
-
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
@@ -308,6 +319,7 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extload node:$ptr, i16))>;
//===----------------------------------------------------------------------===//
// Instruction templates...
+//
class I<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
: X86Inst<o, f, NoImm, ops, asm> {
@@ -355,13 +367,13 @@ def IMPLICIT_DEF_GR32 : I<0, Pseudo, (ops GR32:$dst),
def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
// Truncate
-def TRUNC_GR32_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
- "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
-def TRUNC_GR16_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
- "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
-def TRUNC_GR32_GR16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
- "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
- [(set GR16:$dst, (trunc GR32:$src))]>;
+def TRUNC_32_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
+def TRUNC_16_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
+def TRUNC_32to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
+ "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
+ [(set GR16:$dst, (trunc GR32:$src))]>;
//===----------------------------------------------------------------------===//
// Control Flow Instructions...
@@ -388,7 +400,7 @@ let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
def JMP32r : I<0xFF, MRM4r, (ops GR32:$dst), "jmp{l} {*}$dst",
[(brind GR32:$dst)]>;
def JMP32m : I<0xFF, MRM4m, (ops i32mem:$dst), "jmp{l} {*}$dst",
- [(brind (loadiPTR addr:$dst))]>;
+ [(brind (loadi32 addr:$dst))]>;
}
// Conditional branches
@@ -510,9 +522,9 @@ def LEA16r : I<0x8D, MRMSrcMem,
(ops GR16:$dst, i32mem:$src),
"lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize;
def LEA32r : I<0x8D, MRMSrcMem,
- (ops GR32:$dst, i32mem:$src),
+ (ops GR32:$dst, lea32mem:$src),
"lea{l} {$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, leaaddr:$src)]>;
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}",
[(X86rep_movs i8)]>,
@@ -1101,9 +1113,10 @@ def INC8r : I<0xFE, MRM0r, (ops GR8 :$dst, GR8 :$src), "inc{b} $dst",
[(set GR8:$dst, (add GR8:$src, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def INC16r : I<0x40, AddRegFrm, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
- [(set GR16:$dst, (add GR16:$src, 1))]>, OpSize;
+ [(set GR16:$dst, (add GR16:$src, 1))]>,
+ OpSize, Requires<[In32BitMode]>;
def INC32r : I<0x40, AddRegFrm, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
- [(set GR32:$dst, (add GR32:$src, 1))]>;
+ [(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (ops i8mem :$dst), "inc{b} $dst",
@@ -1119,9 +1132,10 @@ def DEC8r : I<0xFE, MRM1r, (ops GR8 :$dst, GR8 :$src), "dec{b} $dst",
[(set GR8:$dst, (add GR8:$src, -1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def DEC16r : I<0x48, AddRegFrm, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
- [(set GR16:$dst, (add GR16:$src, -1))]>, OpSize;
+ [(set GR16:$dst, (add GR16:$src, -1))]>,
+ OpSize, Requires<[In32BitMode]>;
def DEC32r : I<0x48, AddRegFrm, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
- [(set GR32:$dst, (add GR32:$src, -1))]>;
+ [(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
@@ -2455,7 +2469,7 @@ def DWARF_LABEL : I<0, Pseudo, (ops i32imm:$id),
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
-// ConstantPool GlobalAddress, ExternalSymbol
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
@@ -2477,18 +2491,16 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
// Calls
def : Pat<(X86tailcall GR32:$dst),
- (CALL32r GR32:$dst)>;
+ (CALL32r GR32:$dst)>;
-def : Pat<(X86tailcall tglobaladdr:$dst),
+def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
(CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86tailcall texternalsym:$dst),
+def : Pat<(X86tailcall (i32 texternalsym:$dst)),
(CALLpcrel32 texternalsym:$dst)>;
-
-
-def : Pat<(X86call tglobaladdr:$dst),
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
(CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86call texternalsym:$dst),
+def : Pat<(X86call (i32 texternalsym:$dst)),
(CALLpcrel32 texternalsym:$dst)>;
// X86 specific add which produces a flag.
@@ -2611,3 +2623,9 @@ include "X86InstrMMX.td"
//===----------------------------------------------------------------------===//
include "X86InstrSSE.td"
+
+//===----------------------------------------------------------------------===//
+// X86-64 Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrX86-64.td"
diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td
new file mode 100644
index 0000000000..e6bfbc4967
--- /dev/null
+++ b/lib/Target/X86/X86InstrX86-64.td
@@ -0,0 +1,1084 @@
+//====- X86InstrX86-64.td - Describe the X86 Instruction Set ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86-64 instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions...
+//
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64>;
+
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let NumMIOperands = 4;
+ let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printlea64_32mem";
+ let NumMIOperands = 4;
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Complex Pattern Definitions...
+//
+def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
+ [add, mul, shl, or, frameindex, X86Wrapper]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : I<o, F, ops, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii8<o, F, ops, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : Ii32<o, F, ops, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm64, ops, asm>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : SSI<o, F, ops, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+ : SDI<o, F, ops, asm, pattern>, REX_W;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments...
+//
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ return (int64_t)N->getValue() == (int32_t)N->getValue();
+}]>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getValue() == (uint32_t)N->getValue();
+}]>;
+
+def i64immSExt8 : PatLeaf<(i64 imm), [{
+ // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int64_t)N->getValue() == (int8_t)N->getValue();
+}]>;
+
+def sextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i1))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i8))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i16))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i32))>;
+
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i1))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i8))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i16))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i32))>;
+
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i1))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i8))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i16))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i32))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+def IMPLICIT_DEF_GR64 : I<0, Pseudo, (ops GR64:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set GR64:$dst, (undef))]>;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1, noResults = 1 in
+ // All calls clobber the non-callee saved registers...
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15] in {
+ def CALL64pcrel32 : I<0xE8, RawFrm, (ops i64imm:$dst, variable_ops),
+ "call ${dst:call}", []>;
+ def CALL64r : I<0xFF, MRM2r, (ops GR64:$dst, variable_ops),
+ "call {*}$dst", [(X86call GR64:$dst)]>;
+ def CALL64m : I<0xFF, MRM2m, (ops i64mem:$dst, variable_ops),
+ "call {*}$dst", []>;
+ }
+
+// Branches
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
+ def JMP64r : I<0xFF, MRM4r, (ops GR64:$dst), "jmp{q} {*}$dst",
+ [(brind GR64:$dst)]>;
+ def JMP64m : I<0xFF, MRM4m, (ops i64mem:$dst), "jmp{q} {*}$dst",
+ [(brind (loadi64 addr:$dst))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+def LEAVE64 : I<0xC9, RawFrm,
+ (ops), "leave", []>, Imp<[RBP,RSP],[RBP,RSP]>;
+def POP64r : I<0x58, AddRegFrm,
+ (ops GR64:$reg), "pop{q} $reg", []>, Imp<[RSP],[RSP]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (ops GR32:$dst, lea64_32mem:$src),
+ "lea{l} {$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+def LEA64r : RI<0x8D, MRMSrcMem, (ops GR64:$dst, lea64mem:$src),
+ "lea{q} {$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+let isTwoAddress = 1 in
+def BSWAP64r : RI<0xC8, AddRegFrm, (ops GR64:$dst, GR64:$src),
+ "bswap{q} $dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+// Exchange
+def XCHG64rr : RI<0x87, MRMDestReg, (ops GR64:$src1, GR64:$src2),
+ "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG64mr : RI<0x87, MRMDestMem, (ops i64mem:$src1, GR64:$src2),
+ "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (ops GR64:$src1, i64mem:$src2),
+ "xchg{q} {$src2|$src1}, {$src1|$src2}", []>;
+
+// Repeat string ops
+def REP_MOVSQ : RI<0xA5, RawFrm, (ops), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>,
+ Imp<[RCX,RDI,RSI], [RCX,RDI,RSI]>, REP;
+def REP_STOSQ : RI<0xAB, RawFrm, (ops), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>,
+ Imp<[RAX,RCX,RDI], [RCX,RDI]>, REP;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+
+def MOV64rr : RI<0x89, MRMDestReg, (ops GR64:$dst, GR64:$src),
+ "mov{q} {$src, $dst|$dst, $src}", []>;
+
+def MOV64ri : RIi64<0xB8, AddRegFrm, (ops GR64:$dst, i64imm:$src),
+ "movabs{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (ops GR64:$dst, i64i32imm:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+
+def MOV64rm : RI<0x8B, MRMSrcMem, (ops GR64:$dst, i64mem:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+
+def MOV64mr : RI<0x89, MRMDestMem, (ops i64mem:$dst, GR64:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (ops i64mem:$dst, i64i32imm:$src),
+ "mov{q} {$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
+
+// Sign/Zero extenders
+
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (ops GR64:$dst, GR8 :$src),
+ "movs{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (ops GR64:$dst, i8mem :$src),
+ "movs{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (ops GR64:$dst, GR16:$src),
+ "movs{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (ops GR64:$dst, i16mem:$src),
+ "movs{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (ops GR64:$dst, GR32:$src),
+ "movs{lq|xd} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (ops GR64:$dst, i32mem:$src),
+ "movs{lq|xd} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (ops GR64:$dst, GR8 :$src),
+ "movz{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (ops GR64:$dst, i8mem :$src),
+ "movz{bq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+def MOVZX64rr16: RI<0xB7, MRMSrcReg, (ops GR64:$dst, GR16:$src),
+ "movz{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: RI<0xB7, MRMSrcMem, (ops GR64:$dst, i16mem:$src),
+ "movz{wq|x} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+def CDQE : RI<0x98, RawFrm, (ops),
+ "{cltq|cdqe}", []>, Imp<[EAX],[RAX]>; // RAX = signext(EAX)
+
+def CQO : RI<0x99, RawFrm, (ops),
+ "{cqto|cqo}", []>, Imp<[RAX],[RAX,RDX]>; // RDX:RAX = signext(RAX)
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions...
+//
+
+let isTwoAddress = 1 in {
+let isConvertibleToThreeAddress = 1 in {
+let isCommutable = 1 in
+def ADD64rr : RI<0x01, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, GR64:$src2))]>;
+
+def ADD64ri32 : RIi32<0x81, MRM0r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2))]>;
+def ADD64ri8 : RIi8<0x83, MRM0r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2))]>;
+} // isConvertibleToThreeAddress
+
+def ADD64rm : RI<0x03, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, (load addr:$src2)))]>;
+} // isTwoAddress
+
+def ADD64mr : RI<0x01, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def ADD64mi32 : RIi32<0x81, MRM0m, (ops i64mem:$dst, i64i32imm :$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+def ADD64mi8 : RIi8<0x83, MRM0m, (ops i64mem:$dst, i64i8imm :$src2),
+ "add{q} {$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def ADC64rr : RI<0x11, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+
+def ADC64rm : RI<0x13, MRMSrcMem , (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
+
+def ADC64ri32 : RIi32<0x81, MRM2r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
+def ADC64ri8 : RIi8<0x83, MRM2r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def ADC64mr : RI<0x11, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def ADC64mi32 : RIi32<0x81, MRM2m, (ops i64mem:$dst, i64i32imm:$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+def ADC64mi8 : RIi8<0x83, MRM2m, (ops i64mem:$dst, i64i8imm :$src2),
+ "adc{q} {$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SUB64rr : RI<0x29, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB64rm : RI<0x2B, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>;
+
+def SUB64ri32 : RIi32<0x81, MRM5r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>;
+def SUB64ri8 : RIi8<0x83, MRM5r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def SUB64mr : RI<0x29, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def SUB64mi32 : RIi32<0x81, MRM5m, (ops i64mem:$dst, i64i32imm:$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+def SUB64mi8 : RIi8<0x83, MRM5m, (ops i64mem:$dst, i64i8imm :$src2),
+ "sub{q} {$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SBB64rr : RI<0x19, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+
+def SBB64rm : RI<0x1B, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
+
+def SBB64ri32 : RIi32<0x81, MRM3r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
+def SBB64ri8 : RIi8<0x83, MRM3r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def SBB64mr : RI<0x19, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def SBB64mi32 : RIi32<0x81, MRM3m, (ops i64mem:$dst, i64i32imm:$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+def SBB64mi8 : RIi8<0x83, MRM3m, (ops i64mem:$dst, i64i8imm :$src2),
+ "sbb{q} {$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+
+// Unsigned multiplication
+def MUL64r : RI<0xF7, MRM4r, (ops GR64:$src),
+ "mul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*GR64
+def MUL64m : RI<0xF7, MRM4m, (ops i64mem:$src),
+ "mul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*[mem64]
+
+// Signed multiplication
+def IMUL64r : RI<0xF7, MRM5r, (ops GR64:$src),
+ "imul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*GR64
+def IMUL64m : RI<0xF7, MRM5m, (ops i64mem:$src),
+ "imul{q} $src", []>,
+ Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*[mem64]
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def IMUL64rr : RI<0xAF, MRMSrcReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "imul{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB;
+
+def IMUL64rm : RI<0xAF, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "imul{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB;
+} // isTwoAddress
+
+// Suprisingly enough, these are not two address instructions!
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (ops GR64:$dst, i64mem:$src1, i64i32imm:$src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (ops GR64:$dst, i64mem:$src1, i64i8imm: $src2),
+ "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>;
+
+// Unsigned division / remainder
+def DIV64r : RI<0xF7, MRM6r, (ops GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+def DIV64m : RI<0xF7, MRM6m, (ops i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+
+// Signed division / remainder
+def IDIV64r: RI<0xF7, MRM7r, (ops GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+def IDIV64m: RI<0xF7, MRM7m, (ops i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>;
+
+// Unary instructions
+let CodeSize = 2 in {
+let isTwoAddress = 1 in
+def NEG64r : RI<0xF7, MRM3r, (ops GR64:$dst, GR64:$src), "neg{q} $dst",
+ [(set GR64:$dst, (ineg GR64:$src))]>;
+def NEG64m : RI<0xF7, MRM3m, (ops i64mem:$dst), "neg{q} $dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def INC64r : RI<0xFF, MRM0r, (ops GR64:$dst, GR64:$src), "inc{q} $dst",
+ [(set GR64:$dst, (add GR64:$src, 1))]>;
+def INC64m : RI<0xFF, MRM0m, (ops i64mem:$dst), "inc{q} $dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def DEC64r : RI<0xFF, MRM1r, (ops GR64:$dst, GR64:$src), "dec{q} $dst",
+ [(set GR64:$dst, (add GR64:$src, -1))]>;
+def DEC64m : RI<0xFF, MRM1m, (ops i64mem:$dst), "dec{q} $dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst)]>;
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
+ [(set GR16:$dst, (add GR16:$src, 1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
+ [(set GR32:$dst, (add GR32:$src, 1))]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
+ [(set GR16:$dst, (add GR16:$src, -1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
+ [(set GR32:$dst, (add GR32:$src, -1))]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress
+} // CodeSize
+
+
+// Shift instructions
+let isTwoAddress = 1 in {
+def SHL64rCL : RI<0xD3, MRM4r, (ops GR64:$dst, GR64:$src),
+ "shl{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src, CL))]>,
+ Imp<[CL],[]>;
+def SHL64ri : RIi8<0xC1, MRM4r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "shl{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+def SHL64r1 : RI<0xD1, MRM4r, (ops GR64:$dst, GR64:$src1),
+ "shl{q} $dst", []>;
+} // isTwoAddress
+
+def SHL64mCL : RI<0xD3, MRM4m, (ops i64mem:$dst),
+ "shl{q} {%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (ops i64mem:$dst, i8imm:$src),
+ "shl{q} {$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64m1 : RI<0xC1, MRM4m, (ops i64mem:$dst),
+ "shl{q} $dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SHR64rCL : RI<0xD3, MRM5r, (ops GR64:$dst, GR64:$src),
+ "shr{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src, CL))]>,
+ Imp<[CL],[]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "shr{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (ops GR64:$dst, GR64:$src1),
+ "shr{q} $dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def SHR64mCL : RI<0xD3, MRM5m, (ops i64mem:$dst),
+ "shr{q} {%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (ops i64mem:$dst, i8imm:$src),
+ "shr{q} {$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64m1 : RI<0xC1, MRM5m, (ops i64mem:$dst),
+ "shr{q} $dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def SAR64rCL : RI<0xD3, MRM7r, (ops GR64:$dst, GR64:$src),
+ "sar{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src, CL))]>, Imp<[CL],[]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "sar{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (ops GR64:$dst, GR64:$src1),
+ "sar{q} $dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def SAR64mCL : RI<0xD3, MRM7m, (ops i64mem:$dst),
+ "sar{q} {%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (ops i64mem:$dst, i8imm:$src),
+ "sar{q} {$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64m1 : RI<0xC1, MRM7m, (ops i64mem:$dst),
+ "sar{q} $dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Rotate instructions
+let isTwoAddress = 1 in {
+def ROL64rCL : RI<0xD3, MRM0r, (ops GR64:$dst, GR64:$src),
+ "rol{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src, CL))]>, Imp<[CL],[]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "rol{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+def ROL64r1 : RI<0xC1, MRM0r, (ops GR64:$dst, GR64:$src1),
+ "rol{q} $dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def ROL64mCL : I<0xD3, MRM0m, (ops i64mem:$dst),
+ "rol{q} {%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (ops i64mem:$dst, i8imm:$src),
+ "rol{q} {$src, $dst|$dst, $src}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (ops i64mem:$dst),
+ "rol{q} $dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+def ROR64rCL : RI<0xD3, MRM1r, (ops GR64:$dst, GR64:$src),
+ "ror{q} {%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src, CL))]>, Imp<[CL],[]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (ops GR64:$dst, GR64:$src1, i8imm:$src2),
+ "ror{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+def ROR64r1 : RI<0xC1, MRM1r, (ops GR64:$dst, GR64:$src1),
+ "ror{q} $dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+def ROR64mCL : RI<0xD3, MRM1m, (ops i64mem:$dst),
+ "ror{q} {%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>,
+ Imp<[CL],[]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (ops i64mem:$dst, i8imm:$src),
+ "ror{q} {$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (ops i64mem:$dst),
+ "ror{q} $dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Double shift instructions (generalizations of rotate)
+let isTwoAddress = 1 in {
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+
+let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (ops GR64:$dst, GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (ops GR64:$dst, GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+} // isCommutable
+} // isTwoAddress
+
+// Temporary hack: there is no patterns associated with these instructions
+// so we have to tell tblgen that these do not produce results.
+let noResults = 1 in {
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (ops i64mem:$dst, GR64:$src2),
+ "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>,
+ Imp<[CL],[]>, TB;
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (ops i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (ops i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ TB;
+} // noResults
+
+//===----------------------------------------------------------------------===//
+// Logical Instructions...
+//
+
+let isTwoAddress = 1 in
+def NOT64r : RI<0xF7, MRM2r, (ops GR64:$dst, GR64:$src), "not{q} $dst",
+ [(set GR64:$dst, (not GR64:$src))]>;
+def NOT64m : RI<0xF7, MRM2m, (ops i64mem:$dst), "not{q} $dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def AND64rr : RI<0x21, MRMDestReg,
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+def AND64rm : RI<0x23, MRMSrcMem,
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, (load addr:$src2)))]>;
+def AND64ri32 : RIi32<0x81, MRM4r,
+ (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2))]>;
+def AND64ri8 : RIi8<0x83, MRM4r,
+ (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "and{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def AND64mr : RI<0x21, MRMDestMem,
+ (ops i64mem:$dst, GR64:$src),
+ "and{q} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR64:$src), addr:$dst)]>;
+def AND64mi32 : RIi32<0x81, MRM4m,
+ (ops i64mem:$dst, i64i32imm:$src),
+ "and{q} {$src, $dst|$dst, $src}",
+ [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
+def AND64mi8 : RIi8<0x83, MRM4m,
+ (ops i64mem:$dst, i64i8imm :$src),
+ "and{q} {$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def OR64rr : RI<0x09, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+def OR64rm : RI<0x0B, MRMSrcMem , (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, (load addr:$src2)))]>;
+def OR64ri32 : RIi32<0x81, MRM1r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2))]>;
+def OR64ri8 : RIi8<0x83, MRM1r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "or{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def OR64mr : RI<0x09, MRMDestMem, (ops i64mem:$dst, GR64:$src),
+ "or{q} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR64:$src), addr:$dst)]>;
+def OR64mi32 : RIi32<0x81, MRM1m, (ops i64mem:$dst, i64i32imm:$src),
+ "or{q} {$src, $dst|$dst, $src}",
+ [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
+def OR64mi8 : RIi8<0x83, MRM1m, (ops i64mem:$dst, i64i8imm:$src),
+ "or{q} {$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def XOR64rr : RI<0x31, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+def XOR64rm : RI<0x33, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2)))]>;
+def XOR64ri32 : RIi32<0x81, MRM6r,
+ (ops GR64:$dst, GR64:$src1, i64i32imm:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2))]>;
+def XOR64ri8 : RIi8<0x83, MRM6r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2),
+ "xor{q} {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2))]>;
+} // isTwoAddress
+
+def XOR64mr : RI<0x31, MRMDestMem, (ops i64mem:$dst, GR64:$src),
+ "xor{q} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR64:$src), addr:$dst)]>;
+def XOR64mi32 : RIi32<0x81, MRM6m, (ops i64mem:$dst, i64i32imm:$src),
+ "xor{q} {$src, $dst|$dst, $src}",
+ [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>;
+def XOR64mi8 : RIi8<0x83, MRM6m, (ops i64mem:$dst, i64i8imm :$src),
+ "xor{q} {$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+// Integer comparison
+let isCommutable = 1 in
+def TEST64rr : RI<0x85, MRMDestReg, (ops GR64:$src1, GR64:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, GR64:$src2), 0)]>;
+def TEST64mr : RI<0x85, MRMDestMem, (ops i64mem:$src1, GR64:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [/*(X86cmp (and (loadi64 addr:$src1), GR64:$src2), 0)*/]>;
+def TEST64rm : RI<0x85, MRMSrcMem, (ops GR64:$src1, i64mem:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [/*(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0)*/]>;
+def TEST64ri32 : RIi32<0xF7, MRM0r, (ops GR64:$src1, i64i32imm:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0)]>;
+def TEST64mi32 : RIi32<0xF7, MRM0m, (ops i64mem:$src1, i64i32imm:$src2),
+ "test{q} {$src2, $src1|$src1, $src2}",
+ [/*(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2),
+ 0)*/]>;
+
+def CMP64rr : RI<0x39, MRMDestReg, (ops GR64:$src1, GR64:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, GR64:$src2)]>;
+def CMP64mr : RI<0x39, MRMDestMem, (ops i64mem:$src1, GR64:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), GR64:$src2)]>;
+def CMP64rm : RI<0x3B, MRMSrcMem, (ops GR64:$src1, i64mem:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, (loadi64 addr:$src2))]>;
+def CMP64ri32 : RIi32<0x81, MRM7r, (ops GR64:$src1, i64i32imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt32:$src2)]>;
+def CMP64mi32 : RIi32<0x81, MRM7m, (ops i64mem:$src1, i64i32imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2)]>;
+def CMP64mi8 : RIi8<0x83, MRM7m, (ops i64mem:$src1, i64i8imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2)]>;
+def CMP64ri8 : RIi8<0x83, MRM7r, (ops GR64:$src1, i64i8imm:$src2),
+ "cmp{q} {$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt8:$src2)]>;
+
+// Conditional moves
+let isTwoAddress = 1 in {
+def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_B))]>, TB;
+def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovb {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_B))]>, TB;
+def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_AE))]>, TB;
+def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovae {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_AE))]>, TB;
+def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_E))]>, TB;
+def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmove {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_E))]>, TB;
+def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NE))]>, TB;
+def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovne {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NE))]>, TB;
+def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_BE))]>, TB;
+def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovbe {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_BE))]>, TB;
+def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_A))]>, TB;
+def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmova {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_A))]>, TB;
+def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_L))]>, TB;
+def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovl {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_L))]>, TB;
+def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_GE))]>, TB;
+def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovge {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_GE))]>, TB;
+def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_LE))]>, TB;
+def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovle {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_LE))]>, TB;
+def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_G))]>, TB;
+def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovg {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_G))]>, TB;
+def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_S))]>, TB;
+def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovs {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_S))]>, TB;
+def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NS))]>, TB;
+def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovns {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NS))]>, TB;
+def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_P))]>, TB;
+def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_P))]>, TB;
+def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64
+ (ops GR64:$dst, GR64:$src1, GR64:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NP))]>, TB;
+def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64]
+ (ops GR64:$dst, GR64:$src1, i64mem:$src2),
+ "cmovnp {$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NP))]>, TB;
+} // isTwoAddress
+
+//===----------------------------------------------------------------------===//
+// Conversion Instructions...
+//
+
+// f64 -> signed i64
+def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvtsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (ops GR64:$dst, f128mem:$src),
+ "cvtsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (ops GR64:$dst, FR64:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (ops GR64:$dst, f64mem:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (ops GR64:$dst, f128mem:$src),
+ "cvttsd2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+
+// Signed i64 -> f64
+def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (ops FR64:$dst, GR64:$src),
+ "cvtsi2sd{q} {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (ops FR64:$dst, i64mem:$src),
+ "cvtsi2sd{q} {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+let isTwoAddress = 1 in {
+def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, GR64:$src2),
+ "cvtsi2sd{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i64mem:$src2),
+ "cvtsi2sd{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+} // isTwoAddress
+
+// Signed i64 -> f32
+def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (ops FR32:$dst, GR64:$src),
+ "cvtsi2ss{q} {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (ops FR32:$dst, i64mem:$src),
+ "cvtsi2ss{q} {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+let isTwoAddress = 1 in {
+def Int_CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg,
+ (ops VR128:$dst, VR128:$src1, GR64:$src2),
+ "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+def Int_CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem,
+ (ops VR128:$dst, VR128:$src1, i64mem:$src2),
+ "cvtsi2ss{q} {$src2, $dst|$dst, $src2}",
+ []>; // TODO: add intrinsic
+} // isTwoAddress
+
+// f32 -> signed i64
+def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvtss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (ops GR64:$dst, f32mem:$src),
+ "cvtss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (ops GR64:$dst, FR32:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (ops GR64:$dst, f32mem:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (ops GR64:$dst, VR128:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (ops GR64:$dst, f32mem:$src),
+ "cvttss2si{q} {$src, $dst|$dst, $src}",
+ []>; // TODO: add intrinsic
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Truncate
+// In 64-mode, each 64-bit and 32-bit registers has a low 8-bit sub-register.
+def TRUNC_64to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR64:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}",
+ [(set GR8:$dst, (trunc GR64:$src))]>;
+def TRUNC_32to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}",
+ [(set GR8:$dst, (trunc GR32:$src))]>,
+ Requires<[In64BitMode]>;
+def TRUNC_16to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16:$src),
+ "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}",
+ [(set GR8:$dst, (trunc GR16:$src))]>,
+ Requires<[In64BitMode]>;
+
+def TRUNC_64to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR64:$src),
+ "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
+ [(set GR16:$dst, (trunc GR64:$src))]>;
+
+def TRUNC_64to32 : I<0x89, MRMDestReg, (ops GR32:$dst, GR64:$src),
+ "mov{l} {${src:subreg32}, $dst|$dst, ${src:subreg32}}",
+ [(set GR32:$dst, (trunc GR64:$src))]>;
+
+// Zero-extension
+// TODO: Remove this after proper i32 -> i64 zext support.
+def PsMOVZX64rr32: I<0x89, MRMDestReg, (ops GR64:$dst, GR32:$src),
+ "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR32:$src))]>;
+def PsMOVZX64rm32: I<0x8B, MRMSrcMem, (ops GR64:$dst, i32mem:$src),
+ "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let AddedComplexity = 1 in
+def MOV64r0 : RI<0x31, MRMInitReg, (ops GR64:$dst),
+ "xor{q} $dst, $dst",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero.
+let AddedComplexity = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (ops GR64:$dst, i64i32imm:$src),
+ "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, i64immZExt32:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Calls
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86tailcall (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall GR64:$dst),
+ (CALL64r GR64:$dst)>;
+
+// {s|z}extload bool -> {s|z}extload byte
+def : Pat<(sextloadi64i1 addr:$src), (MOVSX64rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+def : Pat<(extloadi64i32 addr:$src), (PsMOVZX64rm32 addr:$src)>;
+
+// anyext -> zext
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16:$src)>;
+def : Pat<(i64 (anyext GR32:$src)), (PsMOVZX64rr32 GR32:$src)>;
+def : Pat<(i64 (anyext (loadi8 addr:$src))), (MOVZX64rm8 addr:$src)>;
+def : Pat<(i64 (anyext (loadi16 addr:$src))), (MOVZX64rm16 addr:$src)>;
+def : Pat<(i64 (anyext (loadi32 addr:$src))), (PsMOVZX64rm32 addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
+def : Pat<(or (srl GR64:$src1, CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+def : Pat<(or (shl GR64:$src1, CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
diff --git a/lib/Target/X86/X86IntelAsmPrinter.cpp b/lib/Target/X86/X86IntelAsmPrinter.cpp
index ddf807f768..24dbb15941 100755
--- a/lib/Target/X86/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/X86IntelAsmPrinter.cpp
@@ -86,8 +86,9 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
- ? MVT::i16 : MVT::i8;
+ MVT::ValueType VT = (strcmp(Modifier,"subreg64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
}
O << RI.get(Reg).Name;
@@ -137,7 +138,8 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
}
}
-void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
+void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier) {
assert(isMem(MI, Op) && "Invalid memory reference!");
const MachineOperand &BaseReg = MI->getOperand(Op);
@@ -156,7 +158,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
O << "[";
bool NeedPlus = false;
if (BaseReg.getReg()) {
- printOp(BaseReg, "mem");
+ printOp(BaseReg, Modifier);
NeedPlus = true;
}
@@ -164,7 +166,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
O << ScaleVal << "*";
- printOp(IndexReg);
+ printOp(IndexReg, Modifier);
NeedPlus = true;
}
@@ -259,14 +261,21 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
// See if a truncate instruction can be turned into a nop.
switch (MI->getOpcode()) {
default: break;
- case X86::TRUNC_GR32_GR16:
- case X86::TRUNC_GR32_GR8:
- case X86::TRUNC_GR16_GR8: {
+ case X86::TRUNC_64to32:
+ case X86::TRUNC_64to16:
+ case X86::TRUNC_32to16:
+ case X86::TRUNC_32to8:
+ case X86::TRUNC_16to8:
+ case X86::TRUNC_32_to8:
+ case X86::TRUNC_16_to8: {
const MachineOperand &MO0 = MI->getOperand(0);
const MachineOperand &MO1 = MI->getOperand(1);
unsigned Reg0 = MO0.getReg();
unsigned Reg1 = MO1.getReg();
- if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
+ unsigned Opc = MI->getOpcode();
+ if (Opc == X86::TRUNC_64to32)
+ Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
+ else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
else
Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
@@ -275,6 +284,9 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
O << "\n\t";
break;
}
+ case X86::PsMOVZX64rr32:
+ O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
+ break;
}
// Call the autogenerated instruction printer routines.
diff --git a/lib/Target/X86/X86IntelAsmPrinter.h b/lib/Target/X86/X86IntelAsmPrinter.h
index ef0af2a6b5..110420bb74 100755
--- a/lib/Target/X86/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/X86IntelAsmPrinter.h
@@ -80,6 +80,10 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
O << "XMMWORD PTR ";
printMemReference(MI, OpNo);
}
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo, "subreg64");
+ }
bool printAsmMRegister(const MachineOperand &MO, const char Mode);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -89,7 +93,8 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
void printMachineInstruction(const MachineInstr *MI);
void printOp(const MachineOperand &MO, const char *Modifier = 0);
void printSSECC(const MachineInstr *MI, unsigned Op);
- void printMemReference(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
void printPICLabel(const MachineInstr *MI, unsigned Op);
bool runOnMachineFunction(MachineFunction &F);
bool doInitialization(Module &M);
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 9bb2a72523..7ea7e9ea6d 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -42,7 +42,65 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
// callee saved registers, for the fastcc calling convention.
extern "C" {
-#if defined(__i386__) || defined(i386) || defined(_M_IX86)
+#if defined(__x86_64__)
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl _X86CompilationCallback\n"
+ "_X86CompilationCallback:\n"
+ // Save RBP
+ "pushq %rbp\n"
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ // Save all int arg registers
+ "pushq %rdi\n"
+ "pushq %rsi\n"
+ "pushq %rdx\n"
+ "pushq %rcx\n"
+ "pushq %r8\n"
+ "pushq %r9\n"
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call _X86CompilationCallback2\n"
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ "popq %r9\n"
+ "popq %r8\n"
+ "popq %rcx\n"
+ "popq %rdx\n"
+ "popq %rsi\n"
+ "popq %rdi\n"
+ // Restore RBP
+ "popq %rbp\n"
+ "ret\n");
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
#ifndef _MSC_VER
void X86CompilationCallback(void);
asm(
@@ -122,7 +180,7 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
"Could not find return address on the stack!");
// It's a stub if there is an interrupt marker after the call.
- bool isStub = ((unsigned char*)(intptr_t)RetAddr)[0] == 0xCD;
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
// The call instruction should have pushed the return value onto the stack...
RetAddr -= 4; // Backtrack to the reference itself...
@@ -135,20 +193,20 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
#endif
// Sanity check to make sure this really is a call instruction.
- assert(((unsigned char*)(intptr_t)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
- unsigned NewVal = (intptr_t)JITCompilerFunction((void*)(intptr_t)RetAddr);
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
- *(unsigned*)(intptr_t)RetAddr = NewVal-RetAddr-4;
+ *(unsigned *)RetAddr = (unsigned)(NewVal-RetAddr-4);
if (isStub) {
// If this is a stub, rewrite the call into an unconditional branch
// instruction so that two return addresses are not pushed onto the stack
// when the requested function finally gets called. This also makes the
// 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
- ((unsigned char*)(intptr_t)RetAddr)[-1] = 0xE9;
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
}
// Change the return address to reexecute the call instruction...
@@ -189,16 +247,17 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
switch ((X86::RelocationType)MR->getRelocationType()) {
- case X86::reloc_pcrel_word:
+ case X86::reloc_pcrel_word: {
// PC relative relocation, add the relocated value to the value already in
// memory, after we adjust it for where the PC is.
- ResultPtr = ResultPtr-(intptr_t)RelocPos-4;
- *((intptr_t*)RelocPos) += ResultPtr;
+ ResultPtr = ResultPtr-(intptr_t)RelocPos-4-MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
break;
+ }
case X86::reloc_absolute_word:
// Absolute relocation, just add the relocated value to the value already
// in memory.
- *((intptr_t*)RelocPos) += ResultPtr;
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
break;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 3a8765330d..037220adbb 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -14,13 +14,13 @@
#include "X86.h"
#include "X86RegisterInfo.h"
-#include "X86Subtarget.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/Constants.h"
-#include "llvm/Type.h"
#include "llvm/Function.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -46,15 +46,32 @@ namespace {
cl::Hidden);
}
-X86RegisterInfo::X86RegisterInfo(const TargetInstrInfo &tii)
- : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), TII(tii) {}
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP),
+ TM(tm), TII(tii) {
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+}
void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, int FrameIdx,
const TargetRegisterClass *RC) const {
unsigned Opc;
- if (RC == &X86::GR32RegClass) {
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32RegClass) {
Opc = X86::MOV32mr;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16mr;
@@ -84,7 +101,9 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC) const{
unsigned Opc;
- if (RC == &X86::GR32RegClass) {
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32RegClass) {
Opc = X86::MOV32rm;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16rm;
@@ -114,7 +133,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *RC) const {
unsigned Opc;
- if (RC == &X86::GR32RegClass) {
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rr;
+ } else if (RC == &X86::GR32RegClass) {
Opc = X86::MOV32rr;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16rr;
@@ -270,12 +291,18 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 },
{ X86::ADC32rr, X86::ADC32mr },
+ { X86::ADC64ri32, X86::ADC64mi32 },
+ { X86::ADC64ri8, X86::ADC64mi8 },
+ { X86::ADC64rr, X86::ADC64mr },
{ X86::ADD16ri, X86::ADD16mi },
{ X86::ADD16ri8, X86::ADD16mi8 },
{ X86::ADD16rr, X86::ADD16mr },
{ X86::ADD32ri, X86::ADD32mi },
{ X86::ADD32ri8, X86::ADD32mi8 },
{ X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD64ri32, X86::ADD64mi32 },
+ { X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64rr, X86::ADD64mr },
{ X86::ADD8ri, X86::ADD8mi },
{ X86::ADD8rr, X86::ADD8mr },
{ X86::AND16ri, X86::AND16mi },
@@ -284,19 +311,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::AND32ri, X86::AND32mi },
{ X86::AND32ri8, X86::AND32mi8 },
{ X86::AND32rr, X86::AND32mr },
+ { X86::AND64ri32, X86::AND64mi32 },
+ { X86::AND64ri8, X86::AND64mi8 },
+ { X86::AND64rr, X86::AND64mr },
{ X86::AND8ri, X86::AND8mi },
{ X86::AND8rr, X86::AND8mr },
{ X86::DEC16r, X86::DEC16m },
{ X86::DEC32r, X86::DEC32m },
+ { X86::DEC64_16r, X86::DEC16m },
+ { X86::DEC64_32r, X86::DEC32m },
+ { X86::DEC64r, X86::DEC64m },
{ X86::DEC8r, X86::DEC8m },
{ X86::INC16r, X86::INC16m },
{ X86::INC32r, X86::INC32m },
+ { X86::INC64_16r, X86::INC16m },
+ { X86::INC64_32r, X86::INC32m },
+ { X86::INC64r, X86::INC64m },
{ X86::INC8r, X86::INC8m },
{ X86::NEG16r, X86::NEG16m },
{ X86::NEG32r, X86::NEG32m },
+ { X86::NEG64r, X86::NEG64m },
{ X86::NEG8r, X86::NEG8m },
{ X86::NOT16r, X86::NOT16m },
{ X86::NOT32r, X86::NOT32m },
+ { X86::NOT64r, X86::NOT64m },
{ X86::NOT8r, X86::NOT8m },
{ X86::OR16ri, X86::OR16mi },
{ X86::OR16ri8, X86::OR16mi8 },
@@ -304,6 +342,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::OR32ri, X86::OR32mi },
{ X86::OR32ri8, X86::OR32mi8 },
{ X86::OR32rr, X86::OR32mr },
+ { X86::OR64ri32, X86::OR64mi32 },
+ { X86::OR64ri8, X86::OR64mi8 },
+ { X86::OR64rr, X86::OR64mr },
{ X86::OR8ri, X86::OR8mi },
{ X86::OR8rr, X86::OR8mr },
{ X86::ROL16r1, X86::ROL16m1 },
@@ -312,6 +353,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ROL32r1, X86::ROL32m1 },
{ X86::ROL32rCL, X86::ROL32mCL },
{ X86::ROL32ri, X86::ROL32mi },
+ { X86::ROL64r1, X86::ROL64m1 },
+ { X86::ROL64rCL, X86::ROL64mCL },
+ { X86::ROL64ri, X86::ROL64mi },
{ X86::ROL8r1, X86::ROL8m1 },
{ X86::ROL8rCL, X86::ROL8mCL },
{ X86::ROL8ri, X86::ROL8mi },
@@ -321,6 +365,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ROR32r1, X86::ROR32m1 },
{ X86::ROR32rCL, X86::ROR32mCL },
{ X86::ROR32ri, X86::ROR32mi },
+ { X86::ROR64r1, X86::ROR64m1 },
+ { X86::ROR64rCL, X86::ROR64mCL },
+ { X86::ROR64ri, X86::ROR64mi },
{ X86::ROR8r1, X86::ROR8m1 },
{ X86::ROR8rCL, X86::ROR8mCL },
{ X86::ROR8ri, X86::ROR8mi },
@@ -330,18 +377,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SAR32r1, X86::SAR32m1 },
{ X86::SAR32rCL, X86::SAR32mCL },
{ X86::SAR32ri, X86::SAR32mi },
+ { X86::SAR64r1, X86::SAR64m1 },
+ { X86::SAR64rCL, X86::SAR64mCL },
+ { X86::SAR64ri, X86::SAR64mi },
{ X86::SAR8r1, X86::SAR8m1 },
{ X86::SAR8rCL, X86::SAR8mCL },
{ X86::SAR8ri, X86::SAR8mi },
{ X86::SBB32ri, X86::SBB32mi },
{ X86::SBB32ri8, X86::SBB32mi8 },
{ X86::SBB32rr, X86::SBB32mr },
+ { X86::SBB64ri32, X86::SBB64mi32 },
+ { X86::SBB64ri8, X86::SBB64mi8 },
+ { X86::SBB64rr, X86::SBB64mr },
{ X86::SHL16r1, X86::SHL16m1 },
{ X86::SHL16rCL, X86::SHL16mCL },
{ X86::SHL16ri, X86::SHL16mi },
{ X86::SHL32r1, X86::SHL32m1 },
{ X86::SHL32rCL, X86::SHL32mCL },
{ X86::SHL32ri, X86::SHL32mi },
+ { X86::SHL64r1, X86::SHL64m1 },
+ { X86::SHL64rCL, X86::SHL64mCL },
+ { X86::SHL64ri, X86::SHL64mi },
{ X86::SHL8r1, X86::SHL8m1 },
{ X86::SHL8rCL, X86::SHL8mCL },
{ X86::SHL8ri, X86::SHL8mi },
@@ -349,12 +405,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SHLD16rri8, X86::SHLD16mri8 },
{ X86::SHLD32rrCL, X86::SHLD32mrCL },
{ X86::SHLD32rri8, X86::SHLD32mri8 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL },
+ { X86::SHLD64rri8, X86::SHLD64mri8 },
{ X86::SHR16r1, X86::SHR16m1 },
{ X86::SHR16rCL, X86::SHR16mCL },
{ X86::SHR16ri, X86::SHR16mi },
{ X86::SHR32r1, X86::SHR32m1 },
{ X86::SHR32rCL, X86::SHR32mCL },
{ X86::SHR32ri, X86::SHR32mi },
+ { X86::SHR64r1, X86::SHR64m1 },
+ { X86::SHR64rCL, X86::SHR64mCL },
+ { X86::SHR64ri, X86::SHR64mi },
{ X86::SHR8r1, X86::SHR8m1 },
{ X86::SHR8rCL, X86::SHR8mCL },
{ X86::SHR8ri, X86::SHR8mi },
@@ -362,12 +423,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SHRD16rri8, X86::SHRD16mri8 },
{ X86::SHRD32rrCL, X86::SHRD32mrCL },
{ X86::SHRD32rri8, X86::SHRD32mri8 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL },
+ { X86::SHRD64rri8, X86::SHRD64mri8 },
{ X86::SUB16ri, X86::SUB16mi },
{ X86::SUB16ri8, X86::SUB16mi8 },
{ X86::SUB16rr, X86::SUB16mr },
{ X86::SUB32ri, X86::SUB32mi },
{ X86::SUB32ri8, X86::SUB32mi8 },
{ X86::SUB32rr, X86::SUB32mr },
+ { X86::SUB64ri32, X86::SUB64mi32 },
+ { X86::SUB64ri8, X86::SUB64mi8 },
+ { X86::SUB64rr, X86::SUB64mr },
{ X86::SUB8ri, X86::SUB8mi },
{ X86::SUB8rr, X86::SUB8mr },
{ X86::XOR16ri, X86::XOR16mi },
@@ -376,6 +442,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::XOR32ri, X86::XOR32mi },
{ X86::XOR32ri8, X86::XOR32mi8 },
{ X86::XOR32rr, X86::XOR32mr },
+ { X86::XOR64ri32, X86::XOR64mi32 },
+ { X86::XOR64ri8, X86::XOR64mi8 },
+ { X86::XOR64rr, X86::XOR64mr },
{ X86::XOR8ri, X86::XOR8mi },
{ X86::XOR8rr, X86::XOR8mr }
};
@@ -388,6 +457,8 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
return MakeM0Inst(X86::MOV16mi, FrameIndex, MI);
else if (MI->getOpcode() == X86::MOV32r0)
return MakeM0Inst(X86::MOV32mi, FrameIndex, MI);
+ else if (MI->getOpcode() == X86::MOV64r0)
+ return MakeM0Inst(X86::MOV64mi32, FrameIndex, MI);
else if (MI->getOpcode() == X86::MOV8r0)
return MakeM0Inst(X86::MOV8mi, FrameIndex, MI);
@@ -399,19 +470,24 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::CMP8ri, X86::CMP8mi },
{ X86::DIV16r, X86::DIV16m },
{ X86::DIV32r, X86::DIV32m },
+ { X86::DIV64r, X86::DIV64m },
{ X86::DIV8r, X86::DIV8m },
{ X86::FsMOVAPDrr, X86::MOVSDmr },
{ X86::FsMOVAPSrr, X86::MOVSSmr },
{ X86::IDIV16r, X86::IDIV16m },
{ X86::IDIV32r, X86::IDIV32m },
+ { X86::IDIV64r, X86::IDIV64m },
{ X86::IDIV8r, X86::IDIV8m },
{ X86::IMUL16r, X86::IMUL16m },
{ X86::IMUL32r, X86::IMUL32m },
+ { X86::IMUL64r, X86::IMUL64m },
{ X86::IMUL8r, X86::IMUL8m },
{ X86::MOV16ri, X86::MOV16mi },
{ X86::MOV16rr, X86::MOV16mr },
{ X86::MOV32ri, X86::MOV32mi },
{ X86::MOV32rr, X86::MOV32mr },
+ { X86::MOV64ri32, X86::MOV64mi32 },
+ { X86::MOV64rr, X86::MOV64mr },
{ X86::MOV8ri, X86::MOV8mi },
{ X86::MOV8rr, X86::MOV8mr },
{ X86::MOVAPDrr, X86::MOVAPDmr },
@@ -424,6 +500,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::MOVUPSrr, X86::MOVUPSmr },
{ X86::MUL16r, X86::MUL16m },
{ X86::MUL32r, X86::MUL32m },
+ { X86::MUL64r, X86::MUL64m },
{ X86::MUL8r, X86::MUL8m },
{ X86::SETAEr, X86::SETAEm },
{ X86::SETAr, X86::SETAm },
@@ -441,9 +518,11 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SETSr, X86::SETSm },
{ X86::TEST16ri, X86::TEST16mi },
{ X86::TEST32ri, X86::TEST32mi },
+ { X86::TEST64ri32, X86::TEST64mi32 },
{ X86::TEST8ri, X86::TEST8mi },
{ X86::XCHG16rr, X86::XCHG16mr },
{ X86::XCHG32rr, X86::XCHG32mr },
+ { X86::XCHG64rr, X86::XCHG64mr },
{ X86::XCHG8rr, X86::XCHG8mr }
};
ASSERT_SORTED(OpcodeTable);
@@ -453,16 +532,23 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
static const TableEntry OpcodeTable[] = {
{ X86::CMP16rr, X86::CMP16rm },
{ X86::CMP32rr, X86::CMP32rm },
+ { X86::CMP64ri32, X86::CMP64mi32 },
+ { X86::CMP64ri8, X86::CMP64mi8 },
+ { X86::CMP64rr, X86::CMP64rm },
{ X86::CMP8rr, X86::CMP8rm },
{ X86::CMPPDrri, X86::CMPPDrmi },
{ X86::CMPPSrri, X86::CMPPSrmi },
{ X86::CMPSDrr, X86::CMPSDrm },
{ X86::CMPSSrr, X86::CMPSSrm },
{ X86::CVTSD2SSrr, X86::CVTSD2SSrm },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
{ X86::CVTSI2SDrr, X86::CVTSI2SDrm },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
{ X86::CVTSI2SSrr, X86::CVTSI2SSrm },
{ X86::CVTSS2SDrr, X86::CVTSS2SDrm },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
{ X86::FsMOVAPDrr, X86::MOVSDrm },
{ X86::FsMOVAPSrr, X86::MOVSSrm },
@@ -470,6 +556,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::IMUL16rri8, X86::IMUL16rmi8 },
{ X86::IMUL32rri, X86::IMUL32rmi },
{ X86::IMUL32rri8, X86::IMUL32rmi8 },
+ { X86::IMUL64rr, X86::IMUL64rm },
+ { X86::IMUL64rri32, X86::IMUL64rmi32 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8 },
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm },
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm },
{ X86::Int_COMISDrr, X86::Int_COMISDrm },
@@ -480,20 +569,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
+ { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
{ X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
+ { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
{ X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
{ X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
{ X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
{ X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
{ X86::MOV16rr, X86::MOV16rm },
{ X86::MOV32rr, X86::MOV32rm },
+ { X86::MOV64rr, X86::MOV64rm },
{ X86::MOV8rr, X86::MOV8rm },
{ X86::MOVAPDrr, X86::MOVAPDrm },
{ X86::MOVAPSrr, X86::MOVAPSrm },
@@ -509,22 +605,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::MOVSX16rr8, X86::MOVSX16rm8 },
{ X86::MOVSX32rr16, X86::MOVSX32rm16 },
{ X86::MOVSX32rr8, X86::MOVSX32rm8 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8 },
{ X86::MOVUPDrr, X86::MOVUPDrm },
{ X86::MOVUPSrr, X86::MOVUPSrm },
{ X86::MOVZX16rr8, X86::MOVZX16rm8 },
{ X86::MOVZX32rr16, X86::MOVZX32rm16 },
{ X86::MOVZX32rr8, X86::MOVZX32rm8 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8 },
{ X86::PSHUFDri, X86::PSHUFDmi },
{ X86::PSHUFHWri, X86::PSHUFHWmi },
{ X86::PSHUFLWri, X86::PSHUFLWmi },
+ { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 },
{ X86::TEST16rr, X86::TEST16rm },
{ X86::TEST32rr, X86::TEST32rm },
+ { X86::TEST64rr, X86::TEST64rm },
{ X86::TEST8rr, X86::TEST8rm },
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
{ X86::UCOMISDrr, X86::UCOMISDrm },
{ X86::UCOMISSrr, X86::UCOMISSrm },
{ X86::XCHG16rr, X86::XCHG16rm },
{ X86::XCHG32rr, X86::XCHG32rm },
+ { X86::XCHG64rr, X86::XCHG64rm },
{ X86::XCHG8rr, X86::XCHG8rm }
};
ASSERT_SORTED(OpcodeTable);
@@ -533,8 +637,10 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
} else if (i == 2) {
static const TableEntry OpcodeTable[] = {
{ X86::ADC32rr, X86::ADC32rm },
+ { X86::ADC64rr, X86::ADC64rm },
{ X86::ADD16rr, X86::ADD16rm },
{ X86::ADD32rr, X86::ADD32rm },
+ { X86::ADD64rr, X86::ADD64rm },
{ X86::ADD8rr, X86::ADD8rm },
{ X86::ADDPDrr, X86::ADDPDrm },
{ X86::ADDPSrr, X86::ADDPSrm },
@@ -544,6 +650,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ADDSUBPSrr, X86::ADDSUBPSrm },
{ X86::AND16rr, X86::AND16rm },
{ X86::AND32rr, X86::AND32rm },
+ { X86::AND64rr, X86::AND64rm },
{ X86::AND8rr, X86::AND8rm },
{ X86::ANDNPDrr, X86::ANDNPDrm },
{ X86::ANDNPSrr, X86::ANDNPSrm },
@@ -551,32 +658,46 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ANDPSrr, X86::ANDPSrm },
{ X86::CMOVA16rr, X86::CMOVA16rm },
{ X86::CMOVA32rr, X86::CMOVA32rm },
+ { X86::CMOVA64rr, X86::CMOVA64rm },
{ X86::CMOVAE16rr, X86::CMOVAE16rm },
{ X86::CMOVAE32rr, X86::CMOVAE32rm },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm },
{ X86::CMOVB16rr, X86::CMOVB16rm },
{ X86::CMOVB32rr, X86::CMOVB32rm },
+ { X86::CMOVB64rr, X86::CMOVB64rm },
{ X86::CMOVBE16rr, X86::CMOVBE16rm },
{ X86::CMOVBE32rr, X86::CMOVBE32rm },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm },
{ X86::CMOVE16rr, X86::CMOVE16rm },
{ X86::CMOVE32rr, X86::CMOVE32rm },
+ { X86::CMOVE64rr, X86::CMOVE64rm },
{ X86::CMOVG16rr, X86::CMOVG16rm },
{ X86::CMOVG32rr, X86::CMOVG32rm },
+ { X86::CMOVG64rr, X86::CMOVG64rm },
{ X86::CMOVGE16rr, X86::CMOVGE16rm },
{ X86::CMOVGE32rr, X86::CMOVGE32rm },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm },
{ X86::CMOVL16rr, X86::CMOVL16rm },
{ X86::CMOVL32rr, X86::CMOVL32rm },
+ { X86::CMOVL64rr, X86::CMOVL64rm },
{ X86::CMOVLE16rr, X86::CMOVLE16rm },
{ X86::CMOVLE32rr, X86::CMOVLE32rm },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm },
{ X86::CMOVNE16rr, X86::CMOVNE16rm },
{ X86::CMOVNE32rr, X86::CMOVNE32rm },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm },
{ X86::CMOVNP16rr, X86::CMOVNP16rm },
{ X86::CMOVNP32rr, X86::CMOVNP32rm },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm },
{ X86::CMOVNS16rr, X86::CMOVNS16rm },
{ X86::CMOVNS32rr, X86::CMOVNS32rm },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm },
{ X86::CMOVP16rr, X86::CMOVP16rm },
{ X86::CMOVP32rr, X86::CMOVP32rm },
+ { X86::CMOVP64rr, X86::CMOVP64rm },
{ X86::CMOVS16rr, X86::CMOVS16rm },
{ X86::CMOVS32rr, X86::CMOVS32rm },
+ { X86::CMOVS64rr, X86::CMOVS64rm },
{ X86::DIVPDrr, X86::DIVPDrm },
{ X86::DIVPSrr, X86::DIVPSrm },
{ X86::DIVSDrr, X86::DIVSDrm },
@@ -597,6 +718,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::MULSSrr, X86::MULSSrm },
{ X86::OR16rr, X86::OR16rm },
{ X86::OR32rr, X86::OR32rm },
+ { X86::OR64rr, X86::OR64rm },
{ X86::OR8rr, X86::OR8rm },
{ X86::ORPDrr, X86::ORPDrm },
{ X86::ORPSrr, X86::ORPSrm },
@@ -655,6 +777,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::RCPPSr, X86::RCPPSm },
{ X86::RSQRTPSr, X86::RSQRTPSm },
{ X86::SBB32rr, X86::SBB32rm },
+ { X86::SBB64rr, X86::SBB64rm },
{ X86::SHUFPDrri, X86::SHUFPDrmi },
{ X86::SHUFPSrri, X86::SHUFPSrmi },
{ X86::SQRTPDr, X86::SQRTPDm },
@@ -663,6 +786,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SQRTSSr, X86::SQRTSSm },
{ X86::SUB16rr, X86::SUB16rm },
{ X86::SUB32rr, X86::SUB32rm },
+ { X86::SUB64rr, X86::SUB64rm },
{ X86::SUB8rr, X86::SUB8rm },
{ X86::SUBPDrr, X86::SUBPDrm },
{ X86::SUBPSrr, X86::SUBPSrm },
@@ -675,6 +799,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::UNPCKLPSrr, X86::UNPCKLPSrm },
{ X86::XOR16rr, X86::XOR16rm },
{ X86::XOR32rr, X86::XOR32rm },
+ { X86::XOR64rr, X86::XOR64rm },
{ X86::XOR8rr, X86::XOR8rm },
{ X86::XORPDrr, X86::XORPDrm },
{ X86::XORPSrr, X86::XORPSrm }
@@ -707,19 +832,29 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
const unsigned *X86RegisterInfo::getCalleeSaveRegs() const {
- static const unsigned CalleeSaveRegs[] = {
+ static const unsigned CalleeSaveRegs32Bit[] = {
X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
};
- return CalleeSaveRegs;
+ static const unsigned CalleeSaveRegs64Bit[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ return Is64Bit ? CalleeSaveRegs64Bit : CalleeSaveRegs32Bit;
}
const TargetRegisterClass* const*
X86RegisterInfo::getCalleeSaveRegClasses() const {
- static const TargetRegisterClass * const CalleeSaveRegClasses[] = {
+ static const TargetRegisterClass * const CalleeSaveRegClasses32Bit[] = {
&X86::GR32RegClass, &X86::GR32RegClass,
&X86::GR32RegClass, &X86::GR32RegClass, 0
};
- return CalleeSaveRegClasses;
+ static const TargetRegisterClass * const CalleeSaveRegClasses64Bit[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass, 0
+ };
+
+ return Is64Bit ? CalleeSaveRegClasses64Bit : CalleeSaveRegClasses32Bit;
}
//===----------------------------------------------------------------------===//
@@ -754,15 +889,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = 0;
if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) {
- New=BuildMI(X86::SUB32ri, 2, X86::ESP).addReg(X86::ESP).addImm(Amount);
+ New=BuildMI(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri, 1, StackPtr)
+ .addReg(StackPtr).addImm(Amount);
} else {
assert(Old->getOpcode() == X86::ADJCALLSTACKUP);
// factor out the amount the callee already popped.
unsigned CalleeAmt = Old->getOperand(1).getImmedValue();
Amount -= CalleeAmt;
if (Amount) {
- unsigned Opc = Amount < 128 ? X86::ADD32ri8 : X86::ADD32ri;
- New = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(Amount);
+ unsigned Opc = (Amount < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ New = BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(Amount);
}
}
@@ -774,9 +912,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
if (unsigned CalleeAmt = I->getOperand(1).getImmedValue()) {
- unsigned Opc = CalleeAmt < 128 ? X86::SUB32ri8 : X86::SUB32ri;
+ unsigned Opc = (CalleeAmt < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
MachineInstr *New =
- BuildMI(Opc, 1, X86::ESP).addReg(X86::ESP).addImm(CalleeAmt);
+ BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(CalleeAmt);
MBB.insert(I, New);
}
}
@@ -794,19 +934,18 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{
}
int FrameIndex = MI.getOperand(i).getFrameIndex();
-
// This must be part of a four operand memory reference. Replace the
- // FrameIndex with base register with EBP. Add add an offset to the offset.
- MI.getOperand(i).ChangeToRegister(hasFP(MF) ? X86::EBP : X86::ESP, false);
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false);
// Now add the frame object offset to the offset from EBP.
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(i+3).getImmedValue()+4;
+ MI.getOperand(i+3).getImmedValue()+SlotSize;
if (!hasFP(MF))
Offset += MF.getFrameInfo()->getStackSize();
else
- Offset += 4; // Skip the saved EBP
+ Offset += SlotSize; // Skip the saved EBP
MI.getOperand(i+3).ChangeToImmediate(Offset);
}
@@ -815,7 +954,7 @@ void
X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
if (hasFP(MF)) {
// Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, -8);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,SlotSize * -2);
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
}
@@ -840,9 +979,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!hasFP(MF))
NumBytes += MFI->getMaxCallFrameSize();
- // Round the size to a multiple of the alignment (don't forget the 4 byte
+ // Round the size to a multiple of the alignment (don't forget the 4/8 byte
// offset though).
- NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4;
+ NumBytes = ((NumBytes+SlotSize)+Align-1)/Align*Align - SlotSize;
}
// Update frame info to pretend that this is part of the stack...
@@ -859,8 +998,10 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
MI = BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("_alloca");
MBB.insert(MBBI, MI);
} else {
- unsigned Opc = NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri;
- MI = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes);
+ unsigned Opc = (NumBytes < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ MI= BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(NumBytes);
MBB.insert(MBBI, MI);
}
}
@@ -868,18 +1009,21 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (hasFP(MF)) {
// Get the offset of the stack slot for the EBP register... which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+4;
+ int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+SlotSize;
// Save EBP into the appropriate stack slot...
- MI = addRegOffset(BuildMI(X86::MOV32mr, 5), // mov [ESP-<offset>], EBP
- X86::ESP, EBPOffset+NumBytes).addReg(X86::EBP);
+ // mov [ESP-<offset>], EBP
+ MI = addRegOffset(BuildMI(Is64Bit ? X86::MOV64mr : X86::MOV32mr, 5),
+ StackPtr, EBPOffset+NumBytes).addReg(FramePtr);
MBB.insert(MBBI, MI);
// Update EBP with the new base value...
- if (NumBytes == 4) // mov EBP, ESP
- MI = BuildMI(X86::MOV32rr, 2, X86::EBP).addReg(X86::ESP);
+ if (NumBytes == SlotSize) // mov EBP, ESP
+ MI = BuildMI(Is64Bit ? X86::MOV64rr : X86::MOV32rr, 2, FramePtr).
+ addReg(StackPtr);
else // lea EBP, [ESP+StackSize]
- MI = addRegOffset(BuildMI(X86::LEA32r, 5, X86::EBP), X86::ESP,NumBytes-4);
+ MI = addRegOffset(BuildMI(Is64Bit ? X86::LEA64r : X86::LEA32r,
+ 5, FramePtr), StackPtr, NumBytes-SlotSize);
MBB.insert(MBBI, MI);
}
@@ -916,13 +1060,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
if (hasFP(MF)) {
// Get the offset of the stack slot for the EBP register... which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4;
+ int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+SlotSize;
// mov ESP, EBP
- BuildMI(MBB, MBBI, X86::MOV32rr, 1, X86::ESP).addReg(X86::EBP);
+ BuildMI(MBB, MBBI, Is64Bit ? X86::MOV64rr : X86::MOV32rr, 1, StackPtr).
+ addReg(FramePtr);
// pop EBP
- BuildMI(MBB, MBBI, X86::POP32r, 0, X86::EBP);
+ BuildMI(MBB, MBBI, Is64Bit ? X86::POP64r : X86::POP32r, 0, FramePtr);
} else {
// Get the number of bytes allocated from the FrameInfo...
unsigned NumBytes = MFI->getStackSize();
@@ -932,14 +1077,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
// instruction, merge the two instructions.
if (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
- if ((PI->getOpcode() == X86::ADD32ri ||
- PI->getOpcode() == X86::ADD32ri8) &&
- PI->getOperand(0).getReg() == X86::ESP) {
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
NumBytes += PI->getOperand(2).getImmedValue();
MBB.erase(PI);
- } else if ((PI->getOpcode() == X86::SUB32ri ||
- PI->getOpcode() == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == X86::ESP) {
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
NumBytes -= PI->getOperand(2).getImmedValue();
MBB.erase(PI);
} else if (PI->getOpcode() == X86::ADJSTACKPTRri) {
@@ -949,11 +1095,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
}
if (NumBytes > 0) {
- unsigned Opc = NumBytes < 128 ? X86::ADD32ri8 : X86::ADD32ri;
- BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes);
+ unsigned Opc = (NumBytes < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(NumBytes);
} else if ((int)NumBytes < 0) {
- unsigned Opc = -NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri;
- BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(-NumBytes);
+ unsigned Opc = (-NumBytes < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(-NumBytes);
}
}
}
@@ -964,7 +1114,7 @@ unsigned X86RegisterInfo::getRARegister() const {
}
unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
- return hasFP(MF) ? X86::EBP : X86::ESP;
+ return hasFP(MF) ? FramePtr : StackPtr;
}
namespace llvm {
@@ -974,68 +1124,160 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) {
case MVT::i8:
if (High) {
switch (Reg) {
- default: return Reg;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::DH;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::CH;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::BH;
}
} else {
switch (Reg) {
- default: return Reg;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::DL;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::CL;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
}
}
case MVT::i16:
switch (Reg) {
default: return Reg;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::DX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::CX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::BX;
- case X86::ESI:
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::SI;
- case X86::EDI:
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
return X86::DI;
- case X86::EBP:
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
return X86::BP;
- case X86::ESP:
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
}
case MVT::i32:
switch (Reg) {
- default: return true;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::EDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::ECX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::EBX;
- case X86::SI:
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::ESI;
- case X86::DI:
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
return X86::EDI;
- case X86::BP:
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
return X86::EBP;
- case X86::SP:
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index e86cc28d52..fdab3ee7a0 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -20,10 +20,26 @@
namespace llvm {
class Type;
class TargetInstrInfo;
+ class X86TargetMachine;
struct X86RegisterInfo : public X86GenRegisterInfo {
+ X86TargetMachine &TM;
const TargetInstrInfo &TII;
- X86RegisterInfo(const TargetInstrInfo &tii);
+private:
+ /// Is64Bit - Is the target 64-bits.
+ bool Is64Bit;
+
+ /// SlotSize - Stack slot size in bytes.
+ unsigned SlotSize;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ unsigned FramePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
/// Code Generation virtual methods...
void storeRegToStackSlot(MachineBasicBlock &MBB,
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 7a713c3110..4728c0c960 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -23,35 +23,92 @@ let Namespace = "X86" in {
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
+ // FIXME: X86-64 have different Dwarf numbers.
+ // 64-bit registers, X86-64 only
+ def RAX : Register<"RAX">, DwarfRegNum<0>;
+ def RDX : Register<"RDX">, DwarfRegNum<1>;
+ def RCX : Register<"RCX">, DwarfRegNum<2>;
+ def RBX : Register<"RBX">, DwarfRegNum<3>;
+ def RSI : Register<"RSI">, DwarfRegNum<4>;
+ def RDI : Register<"RDI">, DwarfRegNum<5>;
+ def RBP : Register<"RBP">, DwarfRegNum<6>;
+ def RSP : Register<"RSP">, DwarfRegNum<7>;
+
+ def R8 : Register<"R8">, DwarfRegNum<8>;
+ def R9 : Register<"R9">, DwarfRegNum<9>;
+ def R10 : Register<"R10">, DwarfRegNum<10>;
+ def R11 : Register<"R11">, DwarfRegNum<11>;
+ def R12 : Register<"R12">, DwarfRegNum<12>;
+ def R13 : Register<"R13">, DwarfRegNum<13>;
+ def R14 : Register<"R14">, DwarfRegNum<14>;
+ def R15 : Register<"R15">, DwarfRegNum<15>;
+
// 32-bit registers
- def EAX : Register<"EAX">, DwarfRegNum<0>;
- def ECX : Register<"ECX">, DwarfRegNum<1>;
- def EDX : Register<"EDX">, DwarfRegNum<2>;
- def EBX : Register<"EBX">, DwarfRegNum<3>;
- def ESP : Register<"ESP">, DwarfRegNum<4>;
- def EBP : Register<"EBP">, DwarfRegNum<5>;
- def ESI : Register<"ESI">, DwarfRegNum<6>;
- def EDI : Register<"EDI">, DwarfRegNum<7>;
+ def EAX : RegisterGroup<"EAX", [RAX]>, DwarfRegNum<0>;
+ def ECX : RegisterGroup<"ECX", [RCX]>, DwarfRegNum<1>;
+ def EDX : RegisterGroup<"EDX", [RDX]>, DwarfRegNum<2>;
+ def EBX : RegisterGroup<"EBX", [RBX]>, DwarfRegNum<3>;
+ def ESP : RegisterGroup<"ESP", [RSP]>, DwarfRegNum<4>;
+ def EBP : RegisterGroup<"EBP", [RBP]>, DwarfRegNum<5>;
+ def ESI : RegisterGroup<"ESI", [RSI]>, DwarfRegNum<6>;
+ def EDI : RegisterGroup<"EDI", [RDI]>, DwarfRegNum<7>;
+ // X86-64 only
+ def R8D : RegisterGroup<"R8D", [R8]>, DwarfRegNum<8>;
+ def R9D : RegisterGroup<"R9D", [R9]>, DwarfRegNum<9>;
+ def R10D : RegisterGroup<"R10D", [R10]>, DwarfRegNum<10>;
+ def R11D : RegisterGroup<"R11D", [R11]>, DwarfRegNum<11>;
+ def R12D : RegisterGroup<"R12D", [R12]>, DwarfRegNum<12>;
+ def R13D : RegisterGroup<"R13D", [R13]>, DwarfRegNum<13>;
+ def R14D : RegisterGroup<"R14D", [R14]>, DwarfRegNum<14>;
+ def R15D : RegisterGroup<"R15D", [R15]>, DwarfRegNum<15>;
+
// 16-bit registers
- def AX : RegisterGroup<"AX", [EAX]>, DwarfRegNum<0>;
- def CX : RegisterGroup<"CX", [ECX]>, DwarfRegNum<1>;
- def DX : RegisterGroup<"DX", [EDX]>, DwarfRegNum<2>;
- def BX : RegisterGroup<"BX", [EBX]>, DwarfRegNum<3>;
- def SP : RegisterGroup<"SP", [ESP]>, DwarfRegNum<4>;
- def BP : RegisterGroup<"BP", [EBP]>, DwarfRegNum<5>;
- def SI : RegisterGroup<"SI", [ESI]>, DwarfRegNum<6>;
- def DI : RegisterGroup<"DI", [EDI]>, DwarfRegNum<7>;
+ def AX : RegisterGroup<"AX", [EAX,RAX]>, DwarfRegNum<0>;
+ def CX : RegisterGroup<"CX", [ECX,RCX]>, DwarfRegNum<1>;
+ def DX : RegisterGroup<"DX", [EDX,RDX]>, DwarfRegNum<2>;
+ def BX : RegisterGroup<"BX", [EBX,RBX]>, DwarfRegNum<3>;
+ def SP : RegisterGroup<"SP", [ESP,RSP]>, DwarfRegNum<4>;
+ def BP : RegisterGroup<"BP", [EBP,RBP]>, DwarfRegNum<5>;
+ def SI : RegisterGroup<"SI", [ESI,RSI]>, DwarfRegNum<6>;
+ def DI : RegisterGroup<"DI", [EDI,RDI]>, DwarfRegNum<7>;
+ // X86-64 only
+ def R8W : RegisterGroup<"R8W", [R8D,R8]>, DwarfRegNum<8>;
+ def R9W : RegisterGroup<"R9W", [R9D,R9]>, DwarfRegNum<9>;
+ def R10W : RegisterGroup<"R10W", [R10D,R10]>, DwarfRegNum<10>;
+ def R11W : RegisterGroup<"R11W", [R11D,R11]>, DwarfRegNum<11>;
+ def R12W : RegisterGroup<"R12W", [R12D,R12]>, DwarfRegNum<12>;
+ def R13W : RegisterGroup<"R13W", [R13D,R13]>, DwarfRegNum<13>;
+ def R14W : RegisterGroup<"R14W", [R14D,R14]>, DwarfRegNum<14>;
+ def R15W : RegisterGroup<"R15W", [R15D,R15]>, DwarfRegNum<15>;
+
// 8-bit registers
- def AL : RegisterGroup<"AL", [AX,EAX]>, DwarfRegNum<0>;
- def CL : RegisterGroup<"CL", [CX,ECX]>, DwarfRegNum<1>;
- def DL : RegisterGroup<"DL", [DX,EDX]>, DwarfRegNum<2>;
- def BL : RegisterGroup<"BL", [BX,EBX]>, DwarfRegNum<3>;
- def AH : RegisterGroup<"AH", [AX,EAX]>, DwarfRegNum<0>;
- def CH : RegisterGroup<"CH", [CX,ECX]>, DwarfRegNum<1>;
- def DH : RegisterGroup<"DH", [DX,EDX]>, DwarfRegNum<2>;
- def BH : RegisterGroup<"BH", [BX,EBX]>, DwarfRegNum<3>;
+ // Low registers
+ def AL : RegisterGroup<"AL", [AX,EAX,RAX]>, DwarfRegNum<0>;
+ def CL : RegisterGroup<"CL", [CX,ECX,RCX]>, DwarfRegNum<1>;
+ def DL : RegisterGroup<"DL", [DX,EDX,RDX]>, DwarfRegNum<2>;
+ def BL : RegisterGroup<"BL", [BX,EBX,RBX]>, DwarfRegNum<3>;
+
+ // X86-64 only
+ def SIL : RegisterGroup<"SIL", [SI,ESI,RSI]>, DwarfRegNum<4>;
+ def DIL : RegisterGroup<"DIL", [DI,EDI,RDI]>, DwarfRegNum<5>;
+ def BPL : RegisterGroup<"BPL", [BP,EBP,RBP]>, DwarfRegNum<6>;
+ def SPL : RegisterGroup<"SPL", [SP,ESP,RSP]>, DwarfRegNum<7>;
+ def R8B : RegisterGroup<"R8B", [R8W,R8D,R8]>, DwarfRegNum<8>;
+ def R9B : RegisterGroup<"R9B", [R9W,R9D,R9]>, DwarfRegNum<9>;
+ def R10B : RegisterGroup<"R10B", [R10W,R10D,R10]>, DwarfRegNum<10>;
+ def R11B : RegisterGroup<"R11B", [R11W,R11D,R11]>, DwarfRegNum<11>;
+ def R12B : RegisterGroup<"R12B", [R12W,R12D,R12]>, DwarfRegNum<12>;
+ def R13B : RegisterGroup<"R13B", [R13W,R13D,R13]>, DwarfRegNum<13>;
+ def R14B : RegisterGroup<"R14B", [R14W,R14D,R14]>, DwarfRegNum<14>;
+ def R15B : RegisterGroup<"R15B", [R15W,R15D,R15]>, DwarfRegNum<15>;
+
+ // High registers X86-32 only
+ def AH : RegisterGroup<"AH", [AX,EAX,RAX]>, DwarfRegNum<0>;
+ def CH : RegisterGroup<"CH", [CX,ECX,RCX]>, DwarfRegNum<1>;
+ def DH : RegisterGroup<"DH", [DX,EDX,RDX]>, DwarfRegNum<2>;
+ def BH : RegisterGroup<"BH", [BX,EBX,RBX]>, DwarfRegNum<3>;
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"MM0">, DwarfRegNum<29>;
@@ -73,14 +130,24 @@ let Namespace = "X86" in {
def FP6 : Register<"FP6">, DwarfRegNum<-1>;
// XMM Registers, used by the various SSE instruction set extensions
- def XMM0: Register<"XMM0">, DwarfRegNum<21>;
- def XMM1: Register<"XMM1">, DwarfRegNum<22>;
- def XMM2: Register<"XMM2">, DwarfRegNum<23>;
- def XMM3: Register<"XMM3">, DwarfRegNum<24>;
- def XMM4: Register<"XMM4">, DwarfRegNum<25>;
- def XMM5: Register<"XMM5">, DwarfRegNum<26>;
- def XMM6: Register<"XMM6">, DwarfRegNum<27>;
- def XMM7: Register<"XMM7">, DwarfRegNum<28>;
+ def XMM0: Register<"XMM0">, DwarfRegNum<17>;
+ def XMM1: Register<"XMM1">, DwarfRegNum<18>;
+ def XMM2: Register<"XMM2">, DwarfRegNum<19>;
+ def XMM3: Register<"XMM3">, DwarfRegNum<20>;
+ def XMM4: Register<"XMM4">, DwarfRegNum<21>;
+ def XMM5: Register<"XMM5">, DwarfRegNum<22>;
+ def XMM6: Register<"XMM6">, DwarfRegNum<23>;
+ def XMM7: Register<"XMM7">, DwarfRegNum<24>;
+
+ // X86-64 only
+ def XMM8: Register<"XMM8">, DwarfRegNum<25>;
+ def XMM9: Register<"XMM9">, DwarfRegNum<26>;
+ def XMM10: Register<"XMM10">, DwarfRegNum<27>;
+ def XMM11: Register<"XMM11">, DwarfRegNum<28>;
+ def XMM12: Register<"XMM12">, DwarfRegNum<29>;
+ def XMM13: Register<"XMM13">, DwarfRegNum<30>;
+ def XMM14: Register<"XMM14">, DwarfRegNum<31>;
+ def XMM15: Register<"XMM15">, DwarfRegNum<32>;
// Floating point stack registers
def ST0 : Register<"ST(0)">, DwarfRegNum<11>;
@@ -99,52 +166,247 @@ let Namespace = "X86" in {
// implicitly defined to be the register allocation order.
//
-// List AL,CL,DL before AH,CH,DH, as X86 processors often suffer from false
-// dependences between upper and lower parts of the register. BL and BH are
-// last because they are call clobbered. Both Athlon and P4 chips suffer this
-// issue.
-def GR8 : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]>;
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
+def GR8 : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SPL or BPL.
+ static const unsigned X86_GR8_AO_64_fp[] =
+ {X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B};
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR8_AO_64[] =
+ {X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL};
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ static const unsigned X86_GR8_AO_32[] =
+ {X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH};
+
+ GR8Class::iterator
+ GR8Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32;
+ else if (hasFP(MF))
+ return X86_GR8_AO_64_fp;
+ else
+ return X86_GR8_AO_64;
+ }
-def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> {
+ GR8Class::iterator
+ GR8Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
+ else if (hasFP(MF))
+ return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
+ else
+ return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
+ }
+ }];
+}
+
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]> {
let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SP or BP.
+ static const unsigned X86_GR16_AO_64_fp[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W};
+ static const unsigned X86_GR16_AO_32_fp[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR16_AO_64[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP};
+ static const unsigned X86_GR16_AO_32[] =
+ {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP};
+
+ GR16Class::iterator
+ GR16Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (hasFP(MF))
+ return X86_GR16_AO_64_fp;
+ else
+ return X86_GR16_AO_64;
+ } else {
+ if (hasFP(MF))
+ return X86_GR16_AO_32_fp;
+ else
+ return X86_GR16_AO_32;
+ }
+ }
+
GR16Class::iterator
GR16Class::allocation_order_end(const MachineFunction &MF) const {
- if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
- return end()-2; // If so, don't allocate SP or BP
- else
- return end()-1; // If not, just don't allocate SP
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (hasFP(MF))
+ return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
+ } else {
+ if (hasFP(MF))
+ return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
+ }
}
}];
}
+
def GR32 : RegisterClass<"X86", [i32], 32,
- [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]> {
let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_AO_64_fp[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
+ static const unsigned X86_GR32_AO_32_fp[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR32_AO_64[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP};
+ static const unsigned X86_GR32_AO_32[] =
+ {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP};
+
+ GR32Class::iterator
+ GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (hasFP(MF))
+ return X86_GR32_AO_64_fp;
+ else
+ return X86_GR32_AO_64;
+ } else {
+ if (hasFP(MF))
+ return X86_GR32_AO_32_fp;
+ else
+ return X86_GR32_AO_32;
+ }
+ }
+
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
- if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
- return end()-2; // If so, don't allocate ESP or EBP
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (hasFP(MF))
+ return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
+ } else {
+ if (hasFP(MF))
+ return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR64 : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64Class::iterator
+ GR64Class::allocation_order_end(const MachineFunction &MF) const {
+ if (hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+ return end()-2; // If so, don't allocate RSP or RBP
else
- return end()-1; // If not, just don't allocate ESP
+ return end()-1; // If not, just don't allocate RSP
}
}];
}
+
// GR16, GR32 subclasses which contain registers that have R8 sub-registers.
+// These should only be used for 32-bit mode.
def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]>;
def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32,
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR32Class::iterator
+ FR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
def FR64 : RegisterClass<"X86", [f64], 64,
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR64Class::iterator
+ FR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
@@ -174,4 +436,21 @@ def RST : RegisterClass<"X86", [f64], 32,
def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VR128Class::iterator
+ VR128Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e3776d87a5..556f40bba2 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "X86Subtarget.h"
+#include "X86GenSubtarget.inc"
#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
-#include "X86GenSubtarget.inc"
+#include <iostream>
using namespace llvm;
cl::opt<X86Subtarget::AsmWriterFlavorTy>
@@ -29,7 +30,18 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::unset),
/// specified arguments. If we can't run cpuid on the host, return true.
static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
unsigned *rECX, unsigned *rEDX) {
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+#if defined(__x86_64__)
+ asm ("pushq\t%%rbx\n\t"
+ "cpuid\n\t"
+ "movl\t%%ebx, %%esi\n\t"
+ "popq\t%%rbx"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
#if defined(__GNUC__)
asm ("pushl\t%%ebx\n\t"
"cpuid\n\t"
@@ -99,8 +111,8 @@ static const char *GetCurrentX86CPU() {
case 9:
case 13: return "pentium-m";
case 14: return "yonah";
- default:
- return (Model > 14) ? "yonah" : "i686";
+ case 15: return "core2";
+ default: return "i686";
}
case 15: {
switch (Model) {
@@ -154,14 +166,16 @@ static const char *GetCurrentX86CPU() {
}
}
-X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
- stackAlignment = 8;
- // FIXME: this is a known good value for Yonah. Not sure about others.
- MinRepStrSizeThreshold = 128;
- X86SSELevel = NoMMXSSE;
- X863DNowLevel = NoThreeDNow;
- AsmFlavor = AsmWriterFlavor;
- Is64Bit = false;
+X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
+ : AsmFlavor(AsmWriterFlavor)
+ , X86SSELevel(NoMMXSSE)
+ , X863DNowLevel(NoThreeDNow)
+ , HasX86_64(false)
+ , stackAlignment(8)
+ // FIXME: this is a known good value for Yonah. How about others?
+ , MinRepStrSizeThreshold(128)
+ , Is64Bit(is64Bit)
+ , TargetType(isELF) { // Default to ELF unless otherwise specified.
// Determine default and user specified characteristics
std::string CPU = GetCurrentX86CPU();
@@ -169,9 +183,12 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
// Parse features string.
ParseSubtargetFeatures(FS, CPU);
- // Default to ELF unless otherwise specified.
- TargetType = isELF;
-
+ if (Is64Bit && !HasX86_64) {
+ std::cerr << "Warning: Generation of 64-bit code for a 32-bit processor "
+ "requested.\n";
+ HasX86_64 = true;
+ }
+
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
const std::string& TT = M.getTargetTriple();
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index b373be61af..224ebcd308 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -44,9 +44,9 @@ protected:
/// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel;
-
- /// Is64Bit - True if the processor supports Em64T.
- bool Is64Bit;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ bool HasX86_64;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@@ -55,6 +55,11 @@ protected:
/// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
unsigned MinRepStrSizeThreshold;
+private:
+ /// Is64Bit - True if the processor supports 64-bit instructions and module
+ /// pointer size is 64 bit.
+ bool Is64Bit;
+
public:
enum {
isELF, isCygwin, isDarwin, isWindows
@@ -63,7 +68,7 @@ public:
/// This constructor initializes the data members to match that
/// of the specified module.
///
- X86Subtarget(const Module &M, const std::string &FS);
+ X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
index b1da4ce9d5..9ca0e15bc9 100644
--- a/lib/Target/X86/X86TargetAsmInfo.cpp
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -26,13 +26,16 @@ X86TargetAsmInfo::X86TargetAsmInfo(const X86TargetMachine &TM) {
case X86Subtarget::isDarwin:
AlignmentIsInBytes = false;
GlobalPrefix = "_";
- Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ if (!Subtarget->is64Bit())
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
ConstantPoolSection = "\t.const\n";
JumpTableDataSection = "\t.const\n"; // FIXME: depends on PIC mode
FourByteConstantSection = "\t.literal4\n";
EightByteConstantSection = "\t.literal8\n";
+ if (Subtarget->is64Bit())
+ SixteenByteConstantSection = "\t.literal16\n";
LCOMMDirective = "\t.lcomm\t";
COMMDirectiveTakesAlignment = false;
HasDotTypeDotSizeDirective = false;
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 807b462b90..b4fc79abca 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -33,22 +33,31 @@ int X86TargetMachineModule = 0;
namespace {
// Register the target.
- RegisterTarget<X86TargetMachine> X("x86", " IA-32 (Pentium and above)");
+ RegisterTarget<X86_32TargetMachine>
+ X("x86", " 32-bit X86: Pentium-Pro and above");
+ RegisterTarget<X86_64TargetMachine>
+ Y("x86-64", " 64-bit X86: EM64T and AMD64");
}
const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
return new X86TargetAsmInfo(*this);
}
-unsigned X86TargetMachine::getJITMatchQuality() {
+unsigned X86_32TargetMachine::getJITMatchQuality() {
#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
return 10;
-#else
+#endif
return 0;
+}
+
+unsigned X86_64TargetMachine::getJITMatchQuality() {
+#if defined(__x86_64__)
+ return 10;
#endif
+ return 0;
}
-unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) {
+unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
// We strongly match "i[3-9]86-*".
std::string TT = M.getTargetTriple();
if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
@@ -65,18 +74,55 @@ unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) {
return getJITMatchQuality()/2;
}
+unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "x86_64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
+ TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
+ return 20;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, false) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, true) {
+}
+
/// X86TargetMachine ctor - Create an ILP32 architecture model
///
-X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS)
- : Subtarget(M, FS), DataLayout("e-p:32:32-d:32-l:32"),
+X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit)
+ : Subtarget(M, FS, is64Bit),
+ DataLayout(Subtarget.is64Bit() ?
+ std::string("e-p:64:64-d:32-l:32") :
+ std::string("e-p:32:32-d:32-l:32")),
FrameInfo(TargetFrameInfo::StackGrowsDown,
- Subtarget.getStackAlignment(), -4),
+ Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
if (getRelocationModel() == Reloc::Default)
if (Subtarget.isTargetDarwin())
setRelocationModel(Reloc::DynamicNoPIC);
else
setRelocationModel(Reloc::PIC_);
+ if (Subtarget.is64Bit()) {
+ // No DynamicNoPIC support under X86-64.
+ if (getRelocationModel() == Reloc::DynamicNoPIC)
+ setRelocationModel(Reloc::PIC_);
+ // Default X86-64 code model is small.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 8278cf1b22..05cb9484d6 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -37,7 +37,7 @@ protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
public:
- X86TargetMachine(const Module &M, const std::string &FS);
+ X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
@@ -54,6 +54,7 @@ public:
static unsigned getModuleMatchQuality(const Module &M);
static unsigned getJITMatchQuality();
+
// Set up the pass pipeline.
virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
@@ -64,6 +65,27 @@ public:
virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
MachineCodeEmitter &MCE);
};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+public:
+ X86_32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+public:
+ X86_64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
} // End llvm namespace
#endif