diff options
author | Evan Cheng <evan.cheng@apple.com> | 2006-09-08 06:48:29 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2006-09-08 06:48:29 +0000 |
commit | 25ab690a43cbbb591b76d49e3595b019c32f4b3f (patch) | |
tree | fe952a3e394b9f01b6ce8ed8691cee8c507ed094 /lib/Target | |
parent | 1e5fb6928c510bc945dbcd23d99022288ad7e863 (diff) | |
download | llvm-25ab690a43cbbb591b76d49e3595b019c32f4b3f.tar.gz llvm-25ab690a43cbbb591b76d49e3595b019c32f4b3f.tar.bz2 llvm-25ab690a43cbbb591b76d49e3595b019c32f4b3f.tar.xz |
Committing X86-64 support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30177 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
25 files changed, 3603 insertions, 462 deletions
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt new file mode 100644 index 0000000000..af3e273237 --- /dev/null +++ b/lib/Target/X86/README-X86-64.txt @@ -0,0 +1,269 @@ +//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// + +Implement different PIC models? Right now we only support Mac OS X with small +PIC code model. + +//===---------------------------------------------------------------------===// + +Make use of "Red Zone". + +//===---------------------------------------------------------------------===// + +Implement __int128 and long double support. + +//===---------------------------------------------------------------------===// + +For this: + +extern void xx(void); +void bar(void) { + xx(); +} + +gcc compiles to: + +.globl _bar +_bar: + jmp _xx + +We need to do the tailcall optimization as well. + +//===---------------------------------------------------------------------===// + +For this: + +int test(int a) +{ + return a * 3; +} + +We generates + leal (%edi,%edi,2), %eax + +We should be generating + leal (%rdi,%rdi,2), %eax + +instead. The later form does not require an address-size prefix 67H. + +It's probably ok to simply emit the corresponding 64-bit super class registers +in this case? + + +//===---------------------------------------------------------------------===// + +AMD64 Optimization Manual 8.2 has some nice information about optimizing integer +multiplication by a constant. How much of it applies to Intel's X86-64 +implementation? There are definite trade-offs to consider: latency vs. register +pressure vs. code size. + +//===---------------------------------------------------------------------===// + +Are we better off using branches instead of cmove to implement FP to +unsigned i64? + +_conv: + ucomiss LC0(%rip), %xmm0 + cvttss2siq %xmm0, %rdx + jb L3 + subss LC0(%rip), %xmm0 + movabsq $-9223372036854775808, %rax + cvttss2siq %xmm0, %rdx + xorq %rax, %rdx +L3: + movq %rdx, %rax + ret + +instead of + +_conv: + movss LCPI1_0(%rip), %xmm1 + cvttss2siq %xmm0, %rcx + movaps %xmm0, %xmm2 + subss %xmm1, %xmm2 + cvttss2siq %xmm2, %rax + movabsq $-9223372036854775808, %rdx + xorq %rdx, %rax + ucomiss %xmm1, %xmm0 + cmovb %rcx, %rax + ret + +Seems like the jb branch has high likelyhood of being taken. It would have +saved a few instructions. + +//===---------------------------------------------------------------------===// + +Poor codegen: + +int X[2]; +int b; +void test(void) { + memset(X, b, 2*sizeof(X[0])); +} + +llc: + movq _b@GOTPCREL(%rip), %rax + movzbq (%rax), %rax + movq %rax, %rcx + shlq $8, %rcx + orq %rax, %rcx + movq %rcx, %rax + shlq $16, %rax + orq %rcx, %rax + movq %rax, %rcx + shlq $32, %rcx + movq _X@GOTPCREL(%rip), %rdx + orq %rax, %rcx + movq %rcx, (%rdx) + ret + +gcc: + movq _b@GOTPCREL(%rip), %rax + movabsq $72340172838076673, %rdx + movzbq (%rax), %rax + imulq %rdx, %rax + movq _X@GOTPCREL(%rip), %rdx + movq %rax, (%rdx) + ret + +//===---------------------------------------------------------------------===// + +Vararg function prologue can be further optimized. Currently all XMM registers +are stored into register save area. Most of them can be eliminated since the +upper bound of the number of XMM registers used are passed in %al. gcc produces +something like the following: + + movzbl %al, %edx + leaq 0(,%rdx,4), %rax + leaq 4+L2(%rip), %rdx + leaq 239(%rsp), %rax + jmp *%rdx + movaps %xmm7, -15(%rax) + movaps %xmm6, -31(%rax) + movaps %xmm5, -47(%rax) + movaps %xmm4, -63(%rax) + movaps %xmm3, -79(%rax) + movaps %xmm2, -95(%rax) + movaps %xmm1, -111(%rax) + movaps %xmm0, -127(%rax) +L2: + +It jumps over the movaps that do not need to be stored. Hard to see this being +significant as it added 5 instruciton (including a indirect branch) to avoid +executing 0 to 8 stores in the function prologue. + +Perhaps we can optimize for the common case where no XMM registers are used for +parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a +leaf function where we can determine that no XMM input parameter is need, avoid +emitting the stores at all. + +//===---------------------------------------------------------------------===// + +AMD64 has a complex calling convention for aggregate passing by value: + +1. If the size of an object is larger than two eightbytes, or in C++, is a non- + POD structure or union type, or contains unaligned fields, it has class + MEMORY. +2. Both eightbytes get initialized to class NO_CLASS. +3. Each field of an object is classified recursively so that always two fields + are considered. The resulting class is calculated according to the classes + of the fields in the eightbyte: + (a) If both classes are equal, this is the resulting class. + (b) If one of the classes is NO_CLASS, the resulting class is the other + class. + (c) If one of the classes is MEMORY, the result is the MEMORY class. + (d) If one of the classes is INTEGER, the result is the INTEGER. + (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as + class. + (f) Otherwise class SSE is used. +4. Then a post merger cleanup is done: + (a) If one of the classes is MEMORY, the whole argument is passed in memory. + (b) If SSEUP is not preceeded by SSE, it is converted to SSE. + +Currently llvm frontend does not handle this correctly. + +Problem 1: + typedef struct { int i; double d; } QuadWordS; +It is currently passed in two i64 integer registers. However, gcc compiled +callee expects the second element 'd' to be passed in XMM0. + +Problem 2: + typedef struct { int32_t i; float j; double d; } QuadWordS; +The size of the first two fields == i64 so they will be combined and passed in +a integer register RDI. The third field is still passed in XMM0. + +Problem 3: + typedef struct { int64_t i; int8_t j; int64_t d; } S; + void test(S s) +The size of this aggregate is greater than two i64 so it should be passed in +memory. Currently llvm breaks this down and passed it in three integer +registers. + +Problem 4: +Taking problem 3 one step ahead where a function expects a aggregate value +in memory followed by more parameter(s) passed in register(s). + void test(S s, int b) + +LLVM IR does not allow parameter passing by aggregates, therefore it must break +the aggregates value (in problem 3 and 4) into a number of scalar values: + void %test(long %s.i, byte %s.j, long %s.d); + +However, if the backend were to lower this code literally it would pass the 3 +values in integer registers. To force it be passed in memory, the frontend +should change the function signiture to: + void %test(long %undef1, long %undef2, long %undef3, long %undef4, + long %undef5, long %undef6, + long %s.i, byte %s.j, long %s.d); +And the callee would look something like this: + call void %test( undef, undef, undef, undef, undef, undef, + %tmp.s.i, %tmp.s.j, %tmp.s.d ); +The first 6 undef parameters would exhaust the 6 integer registers used for +parameter passing. The following three integer values would then be forced into +memory. + +For problem 4, the parameter 'd' would be moved to the front of the parameter +list so it will be passed in register: + void %test(int %d, + long %undef1, long %undef2, long %undef3, long %undef4, + long %undef5, long %undef6, + long %s.i, byte %s.j, long %s.d); + +//===---------------------------------------------------------------------===// + +For this: + +extern int dst[]; +extern int* ptr; + +void test(void) { + ptr = dst; +} + +We generate this code for static relocation model: + +_test: + leaq _dst(%rip), %rax + movq %rax, _ptr(%rip) + ret + +If we are in small code model, they we can treat _dst as a 32-bit constant. + movq $_dst, _ptr(%rip) + +Note, however, we should continue to use RIP relative addressing mode as much as +possible. The above is actually one byte shorter than + movq $_dst, _ptr + +//===---------------------------------------------------------------------===// + +Right now the asm printer assumes GlobalAddress are accessed via RIP relative +addressing. Therefore, it is not possible to generate this: + movabsq $__ZTV10polynomialIdE+16, %rax + +That is ok for now since we currently only support small model. So the above +is selected as + leaq __ZTV10polynomialIdE+16(%rip), %rax + +This is probably slightly slower but is much shorter than movabsq. However, if +we were to support medium or larger code models, we need to use the movabs +instruction. We should probably introduce something like AbsoluteAddress to +distinguish it from GlobalAddress so the asm printer and JIT code emitter can +do the right thing. diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index e15512db23..c4b3d8635f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -20,8 +20,8 @@ include "../Target.td" // X86 Subtarget features. // -def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true", - "Enable 64-bit instructions">; +def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", + "Support 64-bit instructions">; def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", @@ -61,6 +61,8 @@ def : Proc<"prescott", [FeatureMMX, FeatureSSE1, FeatureSSE2, FeatureSSE3]>; def : Proc<"nocona", [FeatureMMX, FeatureSSE1, FeatureSSE2, FeatureSSE3, Feature64Bit]>; +def : Proc<"core2", [FeatureMMX, FeatureSSE1, FeatureSSE2, + FeatureSSE3, Feature64Bit]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; @@ -105,16 +107,20 @@ def X86InstrInfo : InstrInfo { // should be kept up-to-date with the fields in the X86InstrInfo.h file. let TSFlagsFields = ["FormBits", "hasOpSizePrefix", + "hasAdSizePrefix", "Prefix", + "hasREX_WPrefix", "ImmTypeBits", "FPFormBits", "Opcode"]; let TSFlagsShifts = [0, 6, 7, - 11, + 8, + 12, 13, - 16]; + 16, + 24]; } // The X86 target supports two different syntaxes for emitting machine code. diff --git a/lib/Target/X86/X86ATTAsmPrinter.cpp b/lib/Target/X86/X86ATTAsmPrinter.cpp index e3653e4920..b17cde18de 100755 --- a/lib/Target/X86/X86ATTAsmPrinter.cpp +++ b/lib/Target/X86/X86ATTAsmPrinter.cpp @@ -126,8 +126,9 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << '%'; unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0) - ? MVT::i16 : MVT::i8; + MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : + ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); Reg = getX86SubSuperRegister(Reg, VT); } for (const char *Name = RI.get(Reg).Name; *Name; ++Name) @@ -148,9 +149,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (!isMemOp) O << '$'; O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_" << MO.getJumpTableIndex(); - if (Subtarget->isTargetDarwin() && + if (X86PICStyle == PICStyle::Stub && TM.getRelocationModel() == Reloc::PIC_) O << "-\"L" << getFunctionNumber() << "$pb\""; + if (Subtarget->is64Bit()) + O << "(%rip)"; return; } case MachineOperand::MO_ConstantPoolIndex: { @@ -158,7 +161,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (!isMemOp) O << '$'; O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getConstantPoolIndex(); - if (Subtarget->isTargetDarwin() && + if (X86PICStyle == PICStyle::Stub && TM.getRelocationModel() == Reloc::PIC_) O << "-\"L" << getFunctionNumber() << "$pb\""; int Offset = MO.getOffset(); @@ -166,47 +169,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << "+" << Offset; else if (Offset < 0) O << Offset; + + if (Subtarget->is64Bit()) + O << "(%rip)"; return; } case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); bool isMemOp = Modifier && !strcmp(Modifier, "mem"); if (!isMemOp && !isCallOp) O << '$'; - // Darwin block shameless ripped from PPCAsmPrinter.cpp - if (Subtarget->isTargetDarwin() && + + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + bool isExt = (GV->isExternal() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + if (X86PICStyle == PICStyle::Stub && TM.getRelocationModel() != Reloc::Static) { - GlobalValue *GV = MO.getGlobal(); - std::string Name = Mang->getValueName(GV); // Link-once, External, or Weakly-linked global variables need // non-lazily-resolved stubs - if (GV->isExternal() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage()) { + if (isExt) { // Dynamically-resolved functions need a stub for the function. - if (isCallOp && isa<Function>(GV) && cast<Function>(GV)->isExternal()) { + if (isCallOp && isa<Function>(GV)) { FnStubs.insert(Name); O << "L" << Name << "$stub"; } else { GVStubs.insert(Name); O << "L" << Name << "$non_lazy_ptr"; } - } else { - O << Mang->getValueName(GV); - } + } else + O << Name; if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_) O << "-\"L" << getFunctionNumber() << "$pb\""; - } else - O << Mang->getValueName(MO.getGlobal()); + } else + O << Name; + int Offset = MO.getOffset(); if (Offset > 0) O << "+" << Offset; else if (Offset < 0) O << Offset; + + if (!isCallOp && + Subtarget->is64Bit()) { + if (isExt && TM.getRelocationModel() != Reloc::Static) + O << "@GOTPCREL"; + O << "(%rip)"; + } + return; } case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); if (isCallOp && - Subtarget->isTargetDarwin() && + X86PICStyle == PICStyle::Stub && TM.getRelocationModel() != Reloc::Static) { std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); @@ -216,6 +231,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } if (!isCallOp) O << '$'; O << TAI->getGlobalPrefix() << MO.getSymbolName(); + + if (!isCallOp && + Subtarget->is64Bit()) + O << "(%rip)"; + return; } default: @@ -238,7 +258,8 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { } } -void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ +void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier){ assert(isMem(MI, Op) && "Invalid memory reference!"); const MachineOperand &BaseReg = MI->getOperand(Op); @@ -266,12 +287,13 @@ void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ if (IndexReg.getReg() || BaseReg.getReg()) { O << "("; - if (BaseReg.getReg()) - printOperand(MI, Op); + if (BaseReg.getReg()) { + printOperand(MI, Op, Modifier); + } if (IndexReg.getReg()) { O << ","; - printOperand(MI, Op+2); + printOperand(MI, Op+2, Modifier); if (ScaleVal != 1) O << "," << ScaleVal; } @@ -350,43 +372,25 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - // This works around some Darwin assembler bugs. - if (Subtarget->isTargetDarwin()) { - switch (MI->getOpcode()) { - case X86::REP_MOVSB: - O << "rep/movsb (%esi),(%edi)\n"; - return; - case X86::REP_MOVSD: - O << "rep/movsl (%esi),(%edi)\n"; - return; - case X86::REP_MOVSW: - O << "rep/movsw (%esi),(%edi)\n"; - return; - case X86::REP_STOSB: - O << "rep/stosb\n"; - return; - case X86::REP_STOSD: - O << "rep/stosl\n"; - return; - case X86::REP_STOSW: - O << "rep/stosw\n"; - return; - default: - break; - } - } // See if a truncate instruction can be turned into a nop. switch (MI->getOpcode()) { default: break; - case X86::TRUNC_GR32_GR16: - case X86::TRUNC_GR32_GR8: - case X86::TRUNC_GR16_GR8: { + case X86::TRUNC_64to32: + case X86::TRUNC_64to16: + case X86::TRUNC_32to16: + case X86::TRUNC_32to8: + case X86::TRUNC_16to8: + case X86::TRUNC_32_to8: + case X86::TRUNC_16_to8: { const MachineOperand &MO0 = MI->getOperand(0); const MachineOperand &MO1 = MI->getOperand(1); unsigned Reg0 = MO0.getReg(); unsigned Reg1 = MO1.getReg(); - if (MI->getOpcode() == X86::TRUNC_GR32_GR16) + unsigned Opc = MI->getOpcode(); + if (Opc == X86::TRUNC_64to32) + Reg1 = getX86SubSuperRegister(Reg1, MVT::i32); + else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16) Reg1 = getX86SubSuperRegister(Reg1, MVT::i16); else Reg1 = getX86SubSuperRegister(Reg1, MVT::i8); @@ -395,6 +399,9 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { O << "\n\t"; break; } + case X86::PsMOVZX64rr32: + O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t"; + break; } // Call the autogenerated instruction printer routines. diff --git a/lib/Target/X86/X86ATTAsmPrinter.h b/lib/Target/X86/X86ATTAsmPrinter.h index ff707caee6..167e812f4d 100755 --- a/lib/Target/X86/X86ATTAsmPrinter.h +++ b/lib/Target/X86/X86ATTAsmPrinter.h @@ -60,6 +60,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter { void printf128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo, "subreg64"); + } bool printAsmMRegister(const MachineOperand &MO, const char Mode); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -69,7 +72,8 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter { void printMachineInstruction(const MachineInstr *MI); void printSSECC(const MachineInstr *MI, unsigned Op); - void printMemReference(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); void printPICLabel(const MachineInstr *MI, unsigned Op); bool runOnMachineFunction(MachineFunction &F); }; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 4a54e5914d..b634d13ea4 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -30,8 +30,12 @@ Statistic<> llvm::EmittedInsts("asm-printer", "Number of machine instrs printed"); /// doInitialization -bool X86SharedAsmPrinter::doInitialization(Module &M) { +bool X86SharedAsmPrinter::doInitialization(Module &M) { if (Subtarget->isTargetDarwin()) { + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + if (!Subtarget->is64Bit()) + X86PICStyle = PICStyle::Stub; + // Emit initial debug information. DW.BeginModule(&M); } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 8d32f59d8b..6db9e45dc3 100755 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -29,12 +29,19 @@ namespace llvm { extern Statistic<> EmittedInsts; +// FIXME: Move this to CodeGen/AsmPrinter.h +namespace PICStyle { + enum X86AsmPICStyle { + Stub, GOT + }; +} + struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter { DwarfWriter DW; X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM, const TargetAsmInfo *T) - : AsmPrinter(O, TM, T), DW(O, this, T) { + : AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) { Subtarget = &TM.getSubtarget<X86Subtarget>(); } @@ -49,6 +56,8 @@ struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter { MachineFunctionPass::getAnalysisUsage(AU); } + PICStyle::X86AsmPICStyle X86PICStyle; + const X86Subtarget *Subtarget; // Necessary for Darwin to print out the apprioriate types of linker stubs diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f7d53caed4..0ac8bc5f32 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "X86InstrInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "X86Relocations.h" #include "X86.h" @@ -35,14 +37,16 @@ namespace { namespace { class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass { const X86InstrInfo *II; - TargetMachine &TM; + const TargetData *TD; + TargetMachine &TM; MachineCodeEmitter &MCE; + bool Is64BitMode; public: explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce) - : II(0), TM(tm), MCE(mce) {} + : II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {} Emitter(TargetMachine &tm, MachineCodeEmitter &mce, - const X86InstrInfo& ii) - : II(&ii), TM(tm), MCE(mce) {} + const X86InstrInfo &ii, const TargetData &td, bool is64) + : II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {} bool runOnMachineFunction(MachineFunction &MF); @@ -54,20 +58,29 @@ namespace { private: void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); - void emitPCRelativeValue(unsigned Address); - void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall); - void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0); + void emitPCRelativeValue(intptr_t Address); + void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub); + void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative, + int Disp = 0, unsigned PCAdj = 0); void emitExternalSymbolAddress(const char *ES, bool isPCRelative); + void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0, + unsigned PCAdj = 0); + void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0); - void emitDisplacementField(const MachineOperand *RelocOp, int DispVal); + void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, + unsigned PCAdj = 0); void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); - void emitConstant(unsigned Val, unsigned Size); + void emitConstant(uint64_t Val, unsigned Size); void emitMemModRMByte(const MachineInstr &MI, - unsigned Op, unsigned RegOpcodeField); + unsigned Op, unsigned RegOpcodeField, + unsigned PCAdj = 0); + unsigned getX86RegNum(unsigned RegNo); + bool isX86_64ExtendedReg(const MachineOperand &MO); + unsigned determineREX(const MachineInstr &MI); }; } @@ -83,6 +96,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo(); + TD = ((X86TargetMachine&)MF.getTarget()).getTargetData(); + Is64BitMode = + ((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit(); do { MCE.startFunction(MF); @@ -98,9 +114,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { return false; } -/// emitPCRelativeValue - Emit a 32-bit PC relative address. +/// emitPCRelativeValue - Emit a PC relative address. /// -void Emitter::emitPCRelativeValue(unsigned Address) { +void Emitter::emitPCRelativeValue(intptr_t Address) { MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4); } @@ -119,20 +135,22 @@ void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { /// emitGlobalAddressForCall - Emit the specified address to the code stream /// assuming this is part of a function call, which is PC relative. /// -void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) { +void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) { MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), X86::reloc_pcrel_word, GV, 0, - !isTailCall /*Doesn'tNeedStub*/)); + DoesntNeedStub)); MCE.emitWordLE(0); } /// emitGlobalAddress - Emit the specified address to the code stream assuming -/// this is part of a "take the address of a global" instruction, which is not -/// PC relative. +/// this is part of a "take the address of a global" instruction. /// -void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) { - MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), - X86::reloc_absolute_word, GV)); +void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative, + int Disp /* = 0 */, + unsigned PCAdj /* = 0 */) { + unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word; + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt, + GV, PCAdj)); MCE.emitWordLE(Disp); // The relocated value will be added to the displacement } @@ -145,6 +163,26 @@ void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) { MCE.emitWordLE(0); } +/// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */, + unsigned PCAdj /* = 0 */) { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, CPI, PCAdj)); + MCE.emitWordLE(Disp); // The relocated value will be added to the displacement +} + +/// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI, + unsigned PCAdj /* = 0 */) { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, JTI, PCAdj)); + MCE.emitWordLE(0); // The relocated value will be added to the displacement +} + /// N86 namespace - Native X86 Register numbers... used by X86 backend. /// namespace N86 { @@ -153,28 +191,53 @@ namespace N86 { }; } - // getX86RegNum - This function maps LLVM register identifiers to their X86 // specific numbering, which is used in various places encoding instructions. // -static unsigned getX86RegNum(unsigned RegNo) { +unsigned Emitter::getX86RegNum(unsigned RegNo) { switch(RegNo) { - case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; - case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; - case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; - case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; - case X86::ESP: case X86::SP: case X86::AH: return N86::ESP; - case X86::EBP: case X86::BP: case X86::CH: return N86::EBP; - case X86::ESI: case X86::SI: case X86::DH: return N86::ESI; - case X86::EDI: case X86::DI: case X86::BH: return N86::EDI; + case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; + case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; + case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; + case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; + case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: + return N86::ESP; + case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: + return N86::EBP; + case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: + return N86::ESI; + case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: + return N86::EDI; + + case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: + return N86::EAX; + case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: + return N86::ECX; + case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: + return N86::EDX; + case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: + return N86::EBX; + case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: + return N86::ESP; + case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: + return N86::EBP; + case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: + return N86::ESI; + case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: + return N86::EDI; case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: return RegNo-X86::ST0; - case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: - case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: - return RegNo-X86::XMM0; + case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: + case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: + return II->getRegisterInfo().getDwarfRegNum(RegNo) - + II->getRegisterInfo().getDwarfRegNum(X86::XMM0); + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + return II->getRegisterInfo().getDwarfRegNum(RegNo) - + II->getRegisterInfo().getDwarfRegNum(X86::XMM8); default: assert(MRegisterInfo::isVirtualRegister(RegNo) && @@ -199,7 +262,7 @@ void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) { MCE.emitByte(ModRMByte(SS, Index, Base)); } -void Emitter::emitConstant(unsigned Val, unsigned Size) { +void Emitter::emitConstant(uint64_t Val, unsigned Size) { // Output the constant in little endian byte order... for (unsigned i = 0; i != Size; ++i) { MCE.emitByte(Val & 255); @@ -214,7 +277,7 @@ static bool isDisp8(int Value) { } void Emitter::emitDisplacementField(const MachineOperand *RelocOp, - int DispVal) { + int DispVal, unsigned PCAdj) { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (!RelocOp) { @@ -225,14 +288,27 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp, // Otherwise, this is something that requires a relocation. Emit it as such // now. if (RelocOp->isGlobalAddress()) { - emitGlobalAddressForPtr(RelocOp->getGlobal(), RelocOp->getOffset()); + // In 64-bit static small code model, we could potentially emit absolute. + // But it's probably not beneficial. + // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative + // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute + emitGlobalAddressForPtr(RelocOp->getGlobal(), Is64BitMode, + RelocOp->getOffset(), PCAdj); + } else if (RelocOp->isConstantPoolIndex()) { + // Must be in 64-bit mode. + emitPCRelativeConstPoolAddress(RelocOp->getConstantPoolIndex(), + RelocOp->getOffset(), PCAdj); + } else if (RelocOp->isJumpTableIndex()) { + // Must be in 64-bit mode. + emitPCRelativeJumpTableAddress(RelocOp->getJumpTableIndex(), PCAdj); } else { assert(0 && "Unknown value to relocate!"); } } void Emitter::emitMemModRMByte(const MachineInstr &MI, - unsigned Op, unsigned RegOpcodeField) { + unsigned Op, unsigned RegOpcodeField, + unsigned PCAdj) { const MachineOperand &Op3 = MI.getOperand(Op+3); int DispVal = 0; const MachineOperand *DispForReloc = 0; @@ -241,10 +317,18 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, if (Op3.isGlobalAddress()) { DispForReloc = &Op3; } else if (Op3.isConstantPoolIndex()) { - DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex()); - DispVal += Op3.getOffset(); + if (Is64BitMode) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex()); + DispVal += Op3.getOffset(); + } } else if (Op3.isJumpTableIndex()) { - DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex()); + if (Is64BitMode) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex()); + } } else { DispVal = Op3.getImm(); } @@ -256,12 +340,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, unsigned BaseReg = Base.getReg(); // Is a SIB byte needed? - if (IndexReg.getReg() == 0 && BaseReg != X86::ESP) { + if (IndexReg.getReg() == 0 && + (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) { if (BaseReg == 0) { // Just a displacement? // Emit special case [disp32] encoding MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); - emitDisplacementField(DispForReloc, DispVal); + emitDisplacementField(DispForReloc, DispVal, PCAdj); } else { unsigned BaseRegNo = getX86RegNum(BaseReg); if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { @@ -274,12 +359,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, } else { // Emit the most general non-SIB encoding: [REG+disp32] MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); - emitDisplacementField(DispForReloc, DispVal); + emitDisplacementField(DispForReloc, DispVal, PCAdj); } } } else { // We need a SIB byte, so start by outputting the ModR/M byte first - assert(IndexReg.getReg() != X86::ESP && "Cannot use ESP as index reg!"); + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); bool ForceDisp32 = false; bool ForceDisp8 = false; @@ -292,7 +378,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, // Emit the normal disp32 encoding. MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); ForceDisp32 = true; - } else if (DispVal == 0 && BaseReg != X86::EBP) { + } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { // Emit no displacement ModR/M byte MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); } else if (isDisp8(DispVal)) { @@ -327,7 +413,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, if (ForceDisp8) { emitConstant(DispVal, 1); } else if (DispVal != 0 || ForceDisp32) { - emitDisplacementField(DispForReloc, DispVal); + emitDisplacementField(DispForReloc, DispVal, PCAdj); } } } @@ -337,11 +423,131 @@ static unsigned sizeOfImm(const TargetInstrDescriptor &Desc) { case X86II::Imm8: return 1; case X86II::Imm16: return 2; case X86II::Imm32: return 4; + case X86II::Imm64: return 8; default: assert(0 && "Immediate size not set!"); return 0; } } +/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? +/// e.g. r8, xmm8, etc. +bool Emitter::isX86_64ExtendedReg(const MachineOperand &MO) { + if (!MO.isRegister()) return false; + unsigned RegNo = MO.getReg(); + int DWNum = II->getRegisterInfo().getDwarfRegNum(RegNo); + if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::R8) && + DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::R15)) + return true; + if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::XMM8) && + DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::XMM15)) + return true; + return false; +} + +inline static bool isX86_64TruncToByte(unsigned oc) { + return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 || + oc == X86::TRUNC_16to8); +} + + +inline static bool isX86_64NonExtLowByteReg(unsigned reg) { + return (reg == X86::SPL || reg == X86::BPL || + reg == X86::SIL || reg == X86::DIL); +} + +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +unsigned Emitter::determineREX(const MachineInstr &MI) { + unsigned REX = 0; + unsigned Opcode = MI.getOpcode(); + const TargetInstrDescriptor &Desc = II->get(Opcode); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + if (MI.getNumOperands()) { + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + bool isTrunc8 = isX86_64TruncToByte(Opcode); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isRegister()) { + unsigned Reg = MO.getReg(); + // Trunc to byte are actually movb. The real source operand is the low + // byte of the register. + if (isTrunc8 && i == 1) + Reg = getX86SubSuperRegister(Reg, MVT::i8); + if (isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= (1 << 0) | (1 << 2); + break; + case X86II::MRMSrcReg: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + for (unsigned i = 1; i != 5; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isRegister()) { + if (isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + if (MI.getNumOperands() >= 5 && + isX86_64ExtendedReg(MI.getOperand(4))) + REX |= 1 << 2; + unsigned Bit = 0; + for (unsigned i = 0; i != 4; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isRegister()) { + if (isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + void Emitter::emitInstruction(const MachineInstr &MI) { NumEmitted++; // Keep track of the # of mi's emitted @@ -354,18 +560,22 @@ void Emitter::emitInstruction(const MachineInstr &MI) { // Emit the operand size opcode prefix as needed. if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66); + // Emit the address size opcode prefix as needed. + if (Desc.TSFlags & X86II::AdSize) MCE.emitByte(0x67); + + bool Need0FPrefix = false; switch (Desc.TSFlags & X86II::Op0Mask) { case X86II::TB: - MCE.emitByte(0x0F); // Two-byte opcode prefix + Need0FPrefix = true; // Two-byte opcode prefix break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F MCE.emitByte(0xF3); - MCE.emitByte(0x0F); + Need0FPrefix = true; break; case X86II::XD: // F2 0F MCE.emitByte(0xF2); - MCE.emitByte(0x0F); + Need0FPrefix = true; break; case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: @@ -377,6 +587,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) { case 0: break; // No prefix! } + if (Is64BitMode) { + // REX prefix + unsigned REX = determineREX(MI); + if (REX) + MCE.emitByte(0x40 | REX); + } + + // 0x0F escape code must be emitted just before the opcode. + if (Need0FPrefix) + MCE.emitByte(0x0F); + // If this is a two-address instruction, skip one of the register operands. unsigned CurOp = 0; CurOp += (Desc.Flags & M_2_ADDR_FLAG) != 0; @@ -397,6 +618,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) { case X86::IMPLICIT_DEF_GR8: case X86::IMPLICIT_DEF_GR16: case X86::IMPLICIT_DEF_GR32: + case X86::IMPLICIT_DEF_GR64: case X86::IMPLICIT_DEF_FR32: case X86::IMPLICIT_DEF_FR64: case X86::IMPLICIT_DEF_VR64: @@ -417,7 +639,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) { } else if (MO.isGlobalAddress()) { bool isTailCall = Opcode == X86::TAILJMPd || Opcode == X86::TAILJMPr || Opcode == X86::TAILJMPm; - emitGlobalAddressForCall(MO.getGlobal(), isTailCall); + emitGlobalAddressForCall(MO.getGlobal(), !isTailCall); } else if (MO.isExternalSymbol()) { emitExternalSymbolAddress(MO.getSymbolName(), true); } else if (MO.isImmediate()) { @@ -434,15 +656,15 @@ void Emitter::emitInstruction(const MachineInstr &MI) { if (CurOp != MI.getNumOperands()) { const MachineOperand &MO1 = MI.getOperand(CurOp++); if (MO1.isGlobalAddress()) { - assert(sizeOfImm(Desc) == 4 && + assert(sizeOfImm(Desc) == TD->getPointerSize() && "Don't know how to emit non-pointer values!"); - emitGlobalAddressForPtr(MO1.getGlobal(), MO1.getOffset()); + emitGlobalAddressForPtr(MO1.getGlobal(), Is64BitMode, MO1.getOffset()); } else if (MO1.isExternalSymbol()) { - assert(sizeOfImm(Desc) == 4 && + assert(sizeOfImm(Desc) == TD->getPointerSize() && "Don't know how to emit non-pointer values!"); emitExternalSymbolAddress(MO1.getSymbolName(), false); } else if (MO1.isJumpTableIndex()) { - assert(sizeOfImm(Desc) == 4 && + assert(sizeOfImm(Desc) == TD->getPointerSize() && "Don't know how to emit non-pointer values!"); emitConstant(MCE.getJumpTableEntryAddress(MO1.getJumpTableIndex()), 4); } else { @@ -460,13 +682,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) { emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc)); break; } - case X86II::MRMDestMem: + case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(CurOp+4).getReg())); CurOp += 5; if (CurOp != MI.getNumOperands()) emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc)); break; + } case X86II::MRMSrcReg: MCE.emitByte(BaseOpcode); @@ -477,13 +700,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) { emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc)); break; - case X86II::MRMSrcMem: + case X86II::MRMSrcMem: { + unsigned PCAdj = (CurOp+5 != MI.getNumOperands()) ? sizeOfImm(Desc) : 0; + MCE.emitByte(BaseOpcode); - emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg())); + emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()), + PCAdj); CurOp += 5; if (CurOp != MI.getNumOperands()) emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc)); break; + } case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: @@ -500,9 +727,13 @@ void Emitter::emitInstruction(const MachineInstr &MI) { case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRM6m: case X86II::MRM7m: { + unsigned PCAdj = (CurOp+4 != MI.getNumOperands()) ? + (MI.getOperand(CurOp+4).isImmediate() ? sizeOfImm(Desc) : 4) : 0; + MCE.emitByte(BaseOpcode); - emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m); + emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m, + PCAdj); CurOp += 4; if (CurOp != MI.getNumOperands()) { @@ -510,13 +741,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) { if (MO.isImmediate()) emitConstant(MO.getImm(), sizeOfImm(Desc)); else if (MO.isGlobalAddress()) - emitGlobalAddressForPtr(MO.getGlobal(), MO.getOffset()); + emitGlobalAddressForPtr(MO.getGlobal(), Is64BitMode, MO.getOffset()); else if (MO.isJumpTableIndex()) emitConstant(MCE.getJumpTableEntryAddress(MO.getJumpTableIndex()), 4); else assert(0 && "Unknown operand!"); } break; + } case X86II::MRMInitReg: MCE.emitByte(BaseOpcode); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c5ffb06fee..4287ab2791 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -30,8 +30,9 @@ #include "llvm/CodeGen/SSARegMap.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/ADT/Statistic.h" #include <deque> #include <iostream> @@ -58,16 +59,19 @@ namespace { int FrameIndex; } Base; + bool isRIPRel; // RIP relative? unsigned Scale; SDOperand IndexReg; unsigned Disp; GlobalValue *GV; Constant *CP; + const char *ES; + int JT; unsigned Align; // CP alignment. X86ISelAddressMode() - : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0), - CP(0), Align(0) { + : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), + GV(0), CP(0), ES(0), JT(-1), Align(0) { } }; } @@ -92,6 +96,10 @@ namespace { /// bool FastISel; + /// TM - Keep a reference to X86TargetMachine. + /// + X86TargetMachine &TM; + /// X86Lowering - This object fully describes how to lower LLVM code to an /// X86-specific SelectionDAG. X86TargetLowering X86Lowering; @@ -100,12 +108,14 @@ namespace { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; + /// GlobalBaseReg - keeps track of the virtual register mapped onto global + /// base register. unsigned GlobalBaseReg; public: - X86DAGToDAGISel(X86TargetMachine &TM, bool fast) + X86DAGToDAGISel(X86TargetMachine &tm, bool fast) : SelectionDAGISel(X86Lowering), - ContainsFPCode(false), FastISel(fast), + ContainsFPCode(false), FastISel(fast), TM(tm), X86Lowering(*TM.getTargetLowering()), Subtarget(&TM.getSubtarget<X86Subtarget>()) {} @@ -156,13 +166,22 @@ namespace { SDOperand &Scale, SDOperand &Index, SDOperand &Disp) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg; + CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : + AM.Base.Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; - Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp) - : (AM.CP ? - CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp) - : getI32Imm(AM.Disp)); + // These are 32-bit even in 64-bit mode since RIP relative offset + // is 32-bit. + if (AM.GV) + Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); + else if (AM.CP) + Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp); + else if (AM.ES) + Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); + else if (AM.JT != -1) + Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); + else + Disp = getI32Imm(AM.Disp); } /// getI8Imm - Return a target constant with the specified value, of type @@ -476,26 +495,56 @@ void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { /// addressing mode bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot) { + // RIP relative addressing: %rip + 32-bit displacement! + if (AM.isRIPRel) { + if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { + uint64_t Val = cast<ConstantSDNode>(N)->getValue(); + if (isInt32(AM.Disp + Val)) { + AM.Disp += Val; + return false; + } + } + return true; + } + int id = N.Val->getNodeId(); bool Available = isSelected(id); switch (N.getOpcode()) { default: break; - case ISD::Constant: - AM.Disp += cast<ConstantSDNode>(N)->getValue(); - return false; + case ISD::Constant: { + uint64_t Val = cast<ConstantSDNode>(N)->getValue(); + if (isInt32(AM.Disp + Val)) { + AM.Disp += Val; + return false; + } + break; + } case X86ISD::Wrapper: - // If both base and index components have been picked, we can't fit - // the result available in the register in the addressing mode. Duplicate - // GlobalAddress or ConstantPool as displacement. - if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) { + // If value is available in a register both base and index components have + // been picked, we can't fit the result available in the register in the + // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. + + // Can't fit GV or CP in addressing mode for X86-64 medium or large code + // model since the displacement field is 32-bit. Ok for small code model. + + // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP + // relative addressing mode. + if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) && + (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) { + bool isRIP = Subtarget->is64Bit(); + if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val || + AM.BaseType == X86ISelAddressMode::FrameIndexBase)) + break; if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) { if (AM.CP == 0) { AM.CP = CP->get(); AM.Align = CP->getAlignment(); AM.Disp += CP->getOffset(); + if (isRIP) + AM.isRIPRel = true; return false; } } else if (GlobalAddressSDNode *G = @@ -503,6 +552,20 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, if (AM.GV == 0) { AM.GV = G->getGlobal(); AM.Disp += G->getOffset(); + if (isRIP) + AM.isRIPRel = true; + return false; + } + } else if (isRoot && isRIP) { + if (ExternalSymbolSDNode *S = + dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) { + AM.ES = S->getSymbol(); + AM.isRIPRel = true; + return false; + } else if (JumpTableSDNode *J = + dyn_cast<JumpTableSDNode>(N.getOperand(0))) { + AM.JT = J->getIndex(); + AM.isRIPRel = true; return false; } } @@ -533,7 +596,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.Val->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.Val->getOperand(1)); - AM.Disp += AddVal->getValue() << Val; + uint64_t Disp = AM.Disp + AddVal->getValue() << Val; + if (isInt32(Disp)) + AM.Disp = Disp; + else + AM.IndexReg = ShVal; } else { AM.IndexReg = ShVal; } @@ -563,7 +630,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, Reg = MulVal.Val->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(MulVal.Val->getOperand(1)); - AM.Disp += AddVal->getValue() * CN->getValue(); + uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue(); + if (isInt32(Disp)) + AM.Disp = Disp; + else + Reg = N.Val->getOperand(0); } else { Reg = N.Val->getOperand(0); } @@ -641,13 +712,14 @@ bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, if (MatchAddress(N, AM)) return false; + MVT::ValueType VT = N.getValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base.Reg.Val) - AM.Base.Reg = CurDAG->getRegister(0, MVT::i32); + AM.Base.Reg = CurDAG->getRegister(0, VT); } if (!AM.IndexReg.Val) - AM.IndexReg = CurDAG->getRegister(0, MVT::i32); + AM.IndexReg = CurDAG->getRegister(0, VT); getAddressOperands(AM, Base, Scale, Index, Disp); return true; @@ -662,19 +734,20 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base, if (MatchAddress(N, AM)) return false; + MVT::ValueType VT = N.getValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) if (AM.Base.Reg.Val) Complexity = 1; else - AM.Base.Reg = CurDAG->getRegister(0, MVT::i32); + AM.Base.Reg = CurDAG->getRegister(0, VT); else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) Complexity = 4; if (AM.IndexReg.Val) Complexity++; else - AM.IndexReg = CurDAG->getRegister(0, MVT::i32); + AM.IndexReg = CurDAG->getRegister(0, VT); if (AM.Scale > 2) Complexity += 2; @@ -687,8 +760,14 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base, // optimal (especially for code size consideration). LEA is nice because of // its three-address nature. Tweak the cost function again when we can run // convertToThreeAddress() at register allocation time. - if (AM.GV || AM.CP) - Complexity += 2; + if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { + // For X86-64, we should always use lea to materialize RIP relative + // addresses. + if (Subtarget->is64Bit()) + Complexity = 4; + else + Complexity += 2; + } if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val)) Complexity++; @@ -721,6 +800,7 @@ static bool isRegister0(SDOperand Op) { /// base address to use for accessing globals into a register. /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { + assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); if (!GlobalBaseReg) { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = BB->getParent()->front(); @@ -732,7 +812,7 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() { BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0); BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg); } - return CurDAG->getRegister(GlobalBaseReg, MVT::i32).Val; + return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val; } static SDNode *FindCallStartFromCall(SDNode *Node) { @@ -776,9 +856,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd // code and is matched first so to prevent it from being turned into // LEA32r X+c. + // In 64-bit mode, use LEA to take advantage of RIP-relative addressing. + MVT::ValueType PtrVT = TLI.getPointerTy(); SDOperand N0 = N.getOperand(0); SDOperand N1 = N.getOperand(1); - if (N.Val->getValueType(0) == MVT::i32 && + if (N.Val->getValueType(0) == PtrVT && N0.getOpcode() == X86ISD::Wrapper && N1.getOpcode() == ISD::Constant) { unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue(); @@ -786,17 +868,23 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { // TODO: handle ExternalSymbolSDNode. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { - C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32, + C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, G->getOffset() + Offset); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { - C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32, + C = CurDAG->getTargetConstantPool(CP->get(), PtrVT, CP->getAlignment(), CP->getOffset()+Offset); } - if (C.Val) - return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C); + if (C.Val) { + if (Subtarget->is64Bit()) { + SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), + CurDAG->getRegister(0, PtrVT), C }; + return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4); + } else + return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C); + } } // Other cases are handled by auto-generated code. @@ -811,6 +899,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; + case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; } else switch (NVT) { @@ -818,6 +907,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; + case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; } unsigned LoReg, HiReg; @@ -826,6 +916,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; + case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; } SDOperand N0 = Node->getOperand(0); @@ -899,6 +990,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; + case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; } else switch (NVT) { @@ -906,6 +998,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; + case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; } unsigned LoReg, HiReg; @@ -927,6 +1020,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { ClrOpcode = X86::MOV32r0; SExtOpcode = X86::CDQ; break; + case MVT::i64: + LoReg = X86::RAX; HiReg = X86::RDX; + ClrOpcode = X86::MOV64r0; + SExtOpcode = X86::CQO; + break; } SDOperand N0 = Node->getOperand(0); @@ -994,7 +1092,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { } case ISD::TRUNCATE: { - if (NVT == MVT::i8) { + if (!Subtarget->is64Bit() && NVT == MVT::i8) { unsigned Opc2; MVT::ValueType VT; switch (Node->getOperand(0).getValueType()) { @@ -1002,12 +1100,12 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i16: Opc = X86::MOV16to16_; VT = MVT::i16; - Opc2 = X86::TRUNC_GR16_GR8; + Opc2 = X86::TRUNC_16_to8; break; case MVT::i32: Opc = X86::MOV32to32_; VT = MVT::i32; - Opc2 = X86::TRUNC_GR32_GR8; + Opc2 = X86::TRUNC_32_to8; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 964da18035..63ac0e56d8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -42,6 +42,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) { Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSE = Subtarget->hasSSE2(); + X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; // Set up the TargetLowering object. @@ -51,7 +52,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setSetCCResultContents(ZeroOrOneSetCCResult); setSchedulingPreference(SchedulingForRegPressure); setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 - setStackPointerRegisterToSaveRestore(X86::ESP); + setStackPointerRegisterToSaveRestore(X86StackPtr); if (!Subtarget->isTargetDarwin()) // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. @@ -71,6 +72,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addRegisterClass(MVT::i8, X86::GR8RegisterClass); addRegisterClass(MVT::i16, X86::GR16RegisterClass); addRegisterClass(MVT::i32, X86::GR32RegisterClass); + if (Subtarget->is64Bit()) + addRegisterClass(MVT::i64, X86::GR64RegisterClass); // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. @@ -78,11 +81,16 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); - if (X86ScalarSSE) - // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); - else + if (Subtarget->is64Bit()) { + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); + } else { + if (X86ScalarSSE) + // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); + else + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); + } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have // this operation. @@ -96,10 +104,11 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); } - // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 - // isn't legal. - setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); - setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); + if (!Subtarget->is64Bit()) { + // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode. + setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); + } // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have // this operation. @@ -119,14 +128,19 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); - if (X86ScalarSSE && !Subtarget->hasSSE3()) - // Expand FP_TO_UINT into a select. - // FIXME: We would like to use a Custom expander here eventually to do - // the optimal thing for SSE vs. the default expansion in the legalizer. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); - else - // With SSE3 we can use fisttpll to convert to a signed i64. + if (Subtarget->is64Bit()) { + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + } else { + if (X86ScalarSSE && !Subtarget->hasSSE3()) + // Expand FP_TO_UINT into a select. + // FIXME: We would like to use a Custom expander here eventually to do + // the optimal thing for SSE vs. the default expansion in the legalizer. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); + else + // With SSE3 we can use fisttpll to convert to a signed i64. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + } setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); @@ -135,12 +149,15 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BR_CC , MVT::Other, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); setOperationAction(ISD::FREM , MVT::f64 , Expand); + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Expand); setOperationAction(ISD::CTLZ , MVT::i8 , Expand); @@ -150,13 +167,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTLZ , MVT::i32 , Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CTLZ , MVT::i64 , Expand); + } + setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); setOperationAction(ISD::BSWAP , MVT::i16 , Expand); // These should be promoted to a larger select which is supported. setOperationAction(ISD::SELECT , MVT::i1 , Promote); setOperationAction(ISD::SELECT , MVT::i8 , Promote); - // X86 wants to expand cmov itself. setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); @@ -167,6 +189,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SELECT , MVT::i64 , Custom); + setOperationAction(ISD::SETCC , MVT::i64 , Custom); + } // X86 ret instruction may pop stack. setOperationAction(ISD::RET , MVT::Other, Custom); // Darwin ABI issue. @@ -174,6 +200,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::JumpTable , MVT::i32 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); + setOperationAction(ISD::JumpTable , MVT::i64 , Custom); + setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); + } // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); @@ -198,6 +230,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); @@ -441,7 +475,7 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) if (ObjXMMRegs) { // Passed in a XMM register. unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); + X86::VR128RegisterClass); ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); ArgValues.push_back(ArgValue); NumXMMRegs += ObjXMMRegs; @@ -466,8 +500,9 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; if (isVarArg) VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); - ReturnAddrIndex = 0; // No return address slot generated yet. - BytesToPopOnReturn = 0; // Callee pops nothing. + RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. + ReturnAddrIndex = 0; // No return address slot generated yet. + BytesToPopOnReturn = 0; // Callee pops nothing. BytesCallerReserves = ArgOffset; // If this is a struct return on Darwin/X86, the callee pops the hidden struct @@ -539,7 +574,7 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { NumXMMRegs = 0; std::vector<std::pair<unsigned, SDOperand> > RegsToPass; std::vector<SDOperand> MemOpChains; - SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy()); + SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); for (unsigned i = 0; i != NumOps; ++i) { SDOperand Arg = Op.getOperand(5+2*i); @@ -751,6 +786,507 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { return Res.getValue(Op.ResNo); } + +//===----------------------------------------------------------------------===// +// X86-64 C Calling Convention implementation +//===----------------------------------------------------------------------===// + +/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified +/// type should be passed. If it is through stack, returns the size of the stack +/// slot; if it is through integer or XMM register, returns the number of +/// integer or XMM registers are needed. +static void +HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT, + unsigned NumIntRegs, unsigned NumXMMRegs, + unsigned &ObjSize, unsigned &ObjIntRegs, + unsigned &ObjXMMRegs) { + ObjSize = 0; + ObjIntRegs = 0; + ObjXMMRegs = 0; + + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + if (NumIntRegs < 6) + ObjIntRegs = 1; + else { + switch (ObjectVT) { + default: break; + case MVT::i8: ObjSize = 1; break; + case MVT::i16: ObjSize = 2; break; + case MVT::i32: ObjSize = 4; break; + case MVT::i64: ObjSize = 8; break; + } + } + break; + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + if (NumXMMRegs < 8) + ObjXMMRegs = 1; + else { + switch (ObjectVT) { + default: break; + case MVT::f32: ObjSize = 4; break; + case MVT::f64: ObjSize = 8; break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: ObjSize = 16; break; + } + break; + } + } +} + +SDOperand +X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { + unsigned NumArgs = Op.Val->getNumValues() - 1; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SDOperand Root = Op.getOperand(0); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; + std::vector<SDOperand> ArgValues; + + // Add DAG nodes to load the arguments... On entry to a function on the X86, + // the stack frame looks like this: + // + // [RSP] -- return address + // [RSP + 8] -- first nonreg argument (leftmost lexically) + // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size + // ... + // + unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot + unsigned NumIntRegs = 0; // Int regs used for parameter passing. + unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. + + static const unsigned GPR8ArgRegs[] = { + X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B + }; + static const unsigned GPR16ArgRegs[] = { + X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W + }; + static const unsigned GPR32ArgRegs[] = { + X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D + }; + static const unsigned GPR64ArgRegs[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 + }; + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + + for (unsigned i = 0; i < NumArgs; ++i) { + MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); + unsigned ArgIncrement = 8; + unsigned ObjSize = 0; + unsigned ObjIntRegs = 0; + unsigned ObjXMMRegs = 0; + + // FIXME: __int128 and long double support? + HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, + ObjSize, ObjIntRegs, ObjXMMRegs); + if (ObjSize > 8) + ArgIncrement = ObjSize; + + unsigned Reg = 0; + SDOperand ArgValue; + if (ObjIntRegs || ObjXMMRegs) { + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: { + TargetRegisterClass *RC = NULL; + switch (ObjectVT) { + default: break; + case MVT::i8: + RC = X86::GR8RegisterClass; + Reg = GPR8ArgRegs[NumIntRegs]; + break; + case MVT::i16: + RC = X86::GR16RegisterClass; + Reg = GPR16ArgRegs[NumIntRegs]; + break; + case MVT::i32: + RC = X86::GR32RegisterClass; + Reg = GPR32ArgRegs[NumIntRegs]; + break; + case MVT::i64: + RC = X86::GR64RegisterClass; + Reg = GPR64ArgRegs[NumIntRegs]; + break; + } + Reg = AddLiveIn(MF, Reg, RC); + ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); + break; + } + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: { + TargetRegisterClass *RC= (ObjectVT == MVT::f32) ? + X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ? + X86::FR64RegisterClass : X86::VR128RegisterClass); + Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC); + ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); + break; + } + } + NumIntRegs += ObjIntRegs; + NumXMMRegs += ObjXMMRegs; + } else if (ObjSize) { + // XMM arguments have to be aligned on 16-byte boundary. + if (ObjSize == 16) + ArgOffset = ((ArgOffset + 15) / 16) * 16; + // Create the SelectionDAG nodes corresponding to a load from this + // parameter. + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, + DAG.getSrcValue(NULL)); + ArgOffset += ArgIncrement; // Move on to the next argument. + } + + ArgValues.push_back(ArgValue); + } + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + // For X86-64, if there are vararg parameters that are passed via + // registers, then we must store them to their spots on the stack so they + // may be loaded by deferencing the result of va_next. + VarArgsGPOffset = NumIntRegs * 8; + VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; + VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); + RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); + + // Store the integer parameter registers. + std::vector<SDOperand> MemOps; + SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, + DAG.getConstant(VarArgsGPOffset, getPointerTy())); + for (; NumIntRegs != 6; ++NumIntRegs) { + unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], + X86::GR64RegisterClass); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); + SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), + Val, FIN, DAG.getSrcValue(NULL)); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + } + + // Now store the XMM (fp + vector) parameter registers. + FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, + DAG.getConstant(VarArgsFPOffset, getPointerTy())); + for (; NumXMMRegs != 8; ++NumXMMRegs) { + unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], + X86::VR128RegisterClass); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); + SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), + Val, FIN, DAG.getSrcValue(NULL)); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOps[0], MemOps.size()); + } + + ArgValues.push_back(Root); + + ReturnAddrIndex = 0; // No return address slot generated yet. + BytesToPopOnReturn = 0; // Callee pops nothing. + BytesCallerReserves = ArgOffset; + + // Return the new list of results. + std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), + Op.Val->value_end()); + return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size()); +} + +SDOperand +X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) { + SDOperand Chain = Op.getOperand(0); + unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; + bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; + SDOperand Callee = Op.getOperand(4); + MVT::ValueType RetVT= Op.Val->getValueType(0); + unsigned NumOps = (Op.getNumOperands() - 5) / 2; + + // Count how many bytes are to be pushed on the stack. + unsigned NumBytes = 0; + unsigned NumIntRegs = 0; // Int regs used for parameter passing. + unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. + + static const unsigned GPR8ArgRegs[] = { + X86::DIL, X86::SIL, X86::DL, X86::CL, X86::R8B, X86::R9B + }; + static const unsigned GPR16ArgRegs[] = { + X86::DI, X86::SI, X86::DX, X86::CX, X86::R8W, X86::R9W + }; + static const unsigned GPR32ArgRegs[] = { + X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D + }; + static const unsigned GPR64ArgRegs[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 + }; + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Op.getOperand(5+2*i); + MVT::ValueType ArgVT = Arg.getValueType(); + + switch (ArgVT) { + default: assert(0 && "Unknown value type!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + if (NumIntRegs < 6) + ++NumIntRegs; + else + NumBytes += 8; + break; + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + if (NumXMMRegs < 8) + NumXMMRegs++; + else if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + NumBytes += 8; + else { + // XMM arguments have to be aligned on 16-byte boundary. + NumBytes = ((NumBytes + 15) / 16) * 16; + NumBytes += 16; + } + break; + } + } + + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); + + // Arguments go on the stack in reverse order, as specified by the ABI. + unsigned ArgOffset = 0; + NumIntRegs = 0; + NumXMMRegs = 0; + std::vector<std::pair<unsigned, SDOperand> > RegsToPass; + std::vector<SDOperand> MemOpChains; + SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Op.getOperand(5+2*i); + MVT::ValueType ArgVT = Arg.getValueType(); + + switch (ArgVT) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + if (NumIntRegs < 6) { + unsigned Reg = 0; + switch (ArgVT) { + default: break; + case MVT::i8: Reg = GPR8ArgRegs[NumIntRegs]; break; + case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break; + case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break; + case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break; + } + RegsToPass.push_back(std::make_pair(Reg, Arg)); + ++NumIntRegs; + } else { + SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, + Arg, PtrOff, DAG.getSrcValue(NULL))); + ArgOffset += 8; + } + break; + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + if (NumXMMRegs < 8) { + RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); + NumXMMRegs++; + } else { + if (ArgVT != MVT::f32 && ArgVT != MVT::f64) { + // XMM arguments have to be aligned on 16-byte boundary. + ArgOffset = ((ArgOffset + 15) / 16) * 16; + } + SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, + Arg, PtrOff, DAG.getSrcValue(NULL))); + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + ArgOffset += 8; + else + ArgOffset += 16; + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDOperand InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + + if (isVarArg) { + // From AMD64 ABI document: + // For calls that may call functions that use varargs or stdargs + // (prototype-less calls or calls to functions containing ellipsis (...) in + // the declaration) %al is used as hidden argument to specify the number + // of SSE registers used. The contents of %al do not need to match exactly + // the number of registers, but must be an ubound on the number of SSE + // registers used and is in the range 0 - 8 inclusive. + Chain = DAG.getCopyToReg(Chain, X86::AL, + DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + + std::vector<MVT::ValueType> NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + std::vector<SDOperand> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.Val) + Ops.push_back(InFlag); + + // FIXME: Do not generate X86ISD::TAILCALL for now. + Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, + NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Other); // Returns a chain + if (RetVT != MVT::Other) + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); + Ops.push_back(DAG.getConstant(0, getPointerTy())); + Ops.push_back(InFlag); + Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); + if (RetVT != MVT::Other) + InFlag = Chain.getValue(1); + + std::vector<SDOperand> ResultVals; + NodeTys.clear(); + switch (RetVT) { + default: assert(0 && "Unknown value type to return!"); + case MVT::Other: break; + case MVT::i8: + Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + NodeTys.push_back(MVT::i8); + break; + case MVT::i16: + Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + NodeTys.push_back(MVT::i16); + break; + case MVT::i32: + Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + NodeTys.push_back(MVT::i32); + break; + case MVT::i64: + if (Op.Val->getValueType(1) == MVT::i64) { + // FIXME: __int128 support? + Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64, + Chain.getValue(2)).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + NodeTys.push_back(MVT::i64); + } else { + Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + } + NodeTys.push_back(MVT::i64); + break; + case MVT::f32: + case MVT::f64: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + // FIXME: long double support? + Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + NodeTys.push_back(RetVT); + break; + } + + // If the function returns void, just return the chain. + if (ResultVals.empty()) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + NodeTys.push_back(MVT::Other); + ResultVals.push_back(Chain); + SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, + &ResultVals[0], ResultVals.size()); + return Res.getValue(Op.ResNo); +} + //===----------------------------------------------------------------------===// // Fast Calling Convention implementation //===----------------------------------------------------------------------===// @@ -949,6 +1485,7 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { ArgOffset += 4; VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. + RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. ReturnAddrIndex = 0; // No return address slot generated yet. BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. BytesCallerReserves = 0; @@ -1063,7 +1600,7 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){ NumIntRegs = 0; std::vector<std::pair<unsigned, SDOperand> > RegsToPass; std::vector<SDOperand> MemOpChains; - SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy()); + SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); for (unsigned i = 0; i != NumOps; ++i) { SDOperand Arg = Op.getOperand(5+2*i); @@ -1273,10 +1810,13 @@ SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. MachineFunction &MF = DAG.getMachineFunction(); - ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); + if (Subtarget->is64Bit()) + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8); + else + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); } - return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); + return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); } @@ -1291,11 +1831,11 @@ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); if (!isFrameAddress) // Just load the return address - Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, + Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, DAG.getSrcValue(NULL)); else - Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, - DAG.getConstant(4, MVT::i32)); + Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, + DAG.getConstant(4, getPointerTy())); } return std::make_pair(Result, Chain); } @@ -2184,7 +2724,7 @@ static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, /// static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG) { + SelectionDAG &DAG, TargetLowering &TLI) { if (NumNonZero > 8) return SDOperand(); @@ -2217,7 +2757,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, if (ThisElt.Val) V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, - DAG.getConstant(i/2, MVT::i32)); + DAG.getConstant(i/2, TLI.getPointerTy())); } } @@ -2228,7 +2768,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, /// static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG) { + SelectionDAG &DAG, TargetLowering &TLI) { if (NumNonZero > 4) return SDOperand(); @@ -2245,7 +2785,7 @@ static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, First = false; } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), - DAG.getConstant(i, MVT::i32)); + DAG.getConstant(i, TLI.getPointerTy())); } } @@ -2324,12 +2864,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8) { - SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); + SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, + *this); if (V.Val) return V; } if (EVTBits == 16) { - SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); + SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, + *this); if (V.Val) return V; } @@ -2791,7 +3333,8 @@ X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { CP->getAlignment())); if (Subtarget->isTargetDarwin()) { // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + if (!Subtarget->is64Bit() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) Result = DAG.getNode(ISD::ADD, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); } @@ -2807,7 +3350,8 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { getPointerTy())); if (Subtarget->isTargetDarwin()) { // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + if (!Subtarget->is64Bit() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) Result = DAG.getNode(ISD::ADD, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); @@ -2818,7 +3362,7 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { // not the GV offset field. if (getTargetMachine().getRelocationModel() != Reloc::Static && DarwinGVRequiresExtraLoad(GV)) - Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), + Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, DAG.getSrcValue(NULL)); } @@ -2833,7 +3377,8 @@ X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { getPointerTy())); if (Subtarget->isTargetDarwin()) { // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + if (!Subtarget->is64Bit() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) Result = DAG.getNode(ISD::ADD, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); @@ -3234,7 +3779,8 @@ SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { getPointerTy())); if (Subtarget->isTargetDarwin()) { // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + if (!Subtarget->is64Bit() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) Result = DAG.getNode(ISD::ADD, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); @@ -3245,7 +3791,9 @@ SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); - if (CallingConv == CallingConv::Fast && EnableFastCC) + if (Subtarget->is64Bit()) + return LowerX86_64CCCCallTo(Op, DAG); + else if (CallingConv == CallingConv::Fast && EnableFastCC) return LowerFastCCCallTo(Op, DAG); else return LowerCCCCallTo(Op, DAG); @@ -3264,18 +3812,25 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { case 3: { MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); - if (MVT::isVector(ArgVT)) { + if (MVT::isVector(ArgVT) || + (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) { // Integer or FP vector result -> XMM0. if (DAG.getMachineFunction().liveout_empty()) DAG.getMachineFunction().addLiveOut(X86::XMM0); Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), SDOperand()); } else if (MVT::isInteger(ArgVT)) { - // Integer result -> EAX + // Integer result -> EAX / RAX. + // The C calling convention guarantees the return value has been + // promoted to at least MVT::i32. The X86-64 ABI doesn't require the + // value to be promoted MVT::i64. So we don't have to extend it to + // 64-bit. Return the value in EAX, but mark RAX as liveout. + unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; if (DAG.getMachineFunction().liveout_empty()) - DAG.getMachineFunction().addLiveOut(X86::EAX); + DAG.getMachineFunction().addLiveOut(Reg); - Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), + Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX; + Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1), SDOperand()); } else if (!X86ScalarSSE) { // FP return with fp-stack value. @@ -3329,19 +3884,22 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { } break; } - case 5: + case 5: { + unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX; if (DAG.getMachineFunction().liveout_empty()) { - DAG.getMachineFunction().addLiveOut(X86::EAX); - DAG.getMachineFunction().addLiveOut(X86::EDX); + DAG.getMachineFunction().addLiveOut(Reg1); + DAG.getMachineFunction().addLiveOut(Reg2); } - Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(3), + Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3), SDOperand()); - Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); + Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1)); break; + } } return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, - Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), + Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), Copy.getValue(1)); } @@ -3355,7 +3913,9 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); - if (CC == CallingConv::Fast && EnableFastCC) + if (Subtarget->is64Bit()) + return LowerX86_64CCCArguments(Op, DAG); + else if (CC == CallingConv::Fast && EnableFastCC) return LowerFastCCArguments(Op, DAG); else return LowerCCCArguments(Op, DAG); @@ -3394,38 +3954,47 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { bool TwoRepStos = false; if (ValC) { unsigned ValReg; - unsigned Val = ValC->getValue() & 255; + uint64_t Val = ValC->getValue() & 255; // If the value is a constant, then we can potentially use larger sets. switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); - BytesLeft = I->getValue() % 2; - Val = (Val << 8) | Val; ValReg = X86::AX; + Val = (Val << 8) | Val; break; - case 0: // DWORD aligned + case 0: // DWORD aligned AVT = MVT::i32; - if (I) { - Count = DAG.getConstant(I->getValue() / 4, MVT::i32); - BytesLeft = I->getValue() % 4; - } else { - Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), - DAG.getConstant(2, MVT::i8)); - TwoRepStos = true; - } + ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; - ValReg = X86::EAX; + if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } break; default: // Byte aligned AVT = MVT::i8; - Count = Op.getOperand(3); ValReg = X86::AL; + Count = Op.getOperand(3); break; } + if (AVT > MVT::i8) { + if (I) { + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); + BytesLeft = I->getValue() % UBytes; + } else { + assert(AVT >= MVT::i32 && + "Do not use rep;stos if not at least DWORD aligned"); + Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), + Op.getOperand(3), DAG.getConstant(2, MVT::i8)); + TwoRepStos = true; + } + } + Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), InFlag); InFlag = Chain.getValue(1); @@ -3436,9 +4005,11 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { InFlag = Chain.getValue(1); } - Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); + Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, + Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); + Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, + Op.getOperand(1), InFlag); InFlag = Chain.getValue(1); std::vector<MVT::ValueType> Tys; @@ -3455,8 +4026,9 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Count = Op.getOperand(3); MVT::ValueType CVT = Count.getValueType(); SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, - DAG.getConstant(3, CVT)); - Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, + Left, InFlag); InFlag = Chain.getValue(1); Tys.clear(); Tys.push_back(MVT::Other); @@ -3467,12 +4039,23 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); } else if (BytesLeft) { - // Issue stores for the last 1 - 3 bytes. + // Issue stores for the last 1 - 7 bytes. SDOperand Value; unsigned Val = ValC->getValue() & 255; unsigned Offset = I->getValue() - BytesLeft; SDOperand DstAddr = Op.getOperand(1); MVT::ValueType AddrVT = DstAddr.getValueType(); + if (BytesLeft >= 4) { + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + Value = DAG.getConstant(Val, MVT::i32); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, AddrVT, DstAddr, + DAG.getConstant(Offset, AddrVT)), + DAG.getSrcValue(NULL)); + BytesLeft -= 4; + Offset += 4; + } if (BytesLeft >= 2) { Value = DAG.getConstant((Val << 8) | Val, MVT::i16); Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, @@ -3482,7 +4065,6 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { BytesLeft -= 2; Offset += 2; } - if (BytesLeft == 1) { Value = DAG.getConstant(Val, MVT::i8); Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, @@ -3525,19 +4107,11 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); - BytesLeft = I->getValue() % 2; break; - case 0: // DWORD aligned + case 0: // DWORD aligned AVT = MVT::i32; - if (I) { - Count = DAG.getConstant(I->getValue() / 4, MVT::i32); - BytesLeft = I->getValue() % 4; - } else { - Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), - DAG.getConstant(2, MVT::i8)); - TwoRepMovs = true; - } + if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned + AVT = MVT::i64; break; default: // Byte aligned AVT = MVT::i8; @@ -3545,12 +4119,29 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { break; } + if (AVT > MVT::i8) { + if (I) { + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); + BytesLeft = I->getValue() % UBytes; + } else { + assert(AVT >= MVT::i32 && + "Do not use rep;movs if not at least DWORD aligned"); + Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), + Op.getOperand(3), DAG.getConstant(2, MVT::i8)); + TwoRepMovs = true; + } + } + SDOperand InFlag(0, 0); - Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); + Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, + Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); + Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, + Op.getOperand(1), InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); + Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, + Op.getOperand(2), InFlag); InFlag = Chain.getValue(1); std::vector<MVT::ValueType> Tys; @@ -3567,8 +4158,9 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { Count = Op.getOperand(3); MVT::ValueType CVT = Count.getValueType(); SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, - DAG.getConstant(3, CVT)); - Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, + Left, InFlag); InFlag = Chain.getValue(1); Tys.clear(); Tys.push_back(MVT::Other); @@ -3579,13 +4171,26 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); } else if (BytesLeft) { - // Issue loads and stores for the last 1 - 3 bytes. + // Issue loads and stores for the last 1 - 7 bytes. unsigned Offset = I->getValue() - BytesLeft; SDOperand DstAddr = Op.getOperand(1); MVT::ValueType DstVT = DstAddr.getValueType(); SDOperand SrcAddr = Op.getOperand(2); MVT::ValueType SrcVT = SrcAddr.getValueType(); SDOperand Value; + if (BytesLeft >= 4) { + Value = DAG.getLoad(MVT::i32, Chain, + DAG.getNode(ISD::ADD, SrcVT, SrcAddr, + DAG.getConstant(Offset, SrcVT)), + DAG.getSrcValue(NULL)); + Chain = Value.getValue(1); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, DstVT, DstAddr, + DAG.getConstant(Offset, DstVT)), + DAG.getSrcValue(NULL)); + BytesLeft -= 4; + Offset += 4; + } if (BytesLeft >= 2) { Value = DAG.getLoad(MVT::i16, Chain, DAG.getNode(ISD::ADD, SrcVT, SrcAddr, @@ -3635,12 +4240,51 @@ X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { } SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { - // vastart just stores the address of the VarArgsFrameIndex slot into the - // memory location argument. - // FIXME: Replace MVT::i32 with PointerTy - SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); - return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, - Op.getOperand(1), Op.getOperand(2)); + if (!Subtarget->is64Bit()) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, + Op.getOperand(1), Op.getOperand(2)); + } + + // __va_list_tag: + // gp_offset (0 - 6 * 8) + // fp_offset (48 - 48 + 8 * 16) + // overflow_arg_area (point to parameters coming in memory). + // reg_save_area + std::vector<SDOperand> MemOps; + SDOperand FIN = Op.getOperand(1); + // Store gp_offset + SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, Op.getOperand(2)); + MemOps.push_back(Store); + + // Store fp_offset + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), + DAG.getConstant(VarArgsFPOffset, MVT::i32), + FIN, Op.getOperand(2)); + MemOps.push_back(Store); + + // Store ptr to overflow_arg_area + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), + OVFIN, FIN, Op.getOperand(2)); + MemOps.push_back(Store); + + // Store ptr to reg_save_area. + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), + RSFIN, FIN, Op.getOperand(2)); + MemOps.push_back(Store); + return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); } SDOperand @@ -4333,6 +4977,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = Res.second = X86::GR32RegisterClass; } + } else if (VT == MVT::i64) { + unsigned DestReg = 0; + switch (Res.first) { + default: break; + case X86::AX: DestReg = X86::RAX; break; + case X86::DX: DestReg = X86::RDX; break; + case X86::CX: DestReg = X86::RCX; break; + case X86::BX: DestReg = X86::RBX; break; + case X86::SI: DestReg = X86::RSI; break; + case X86::DI: DestReg = X86::RDI; break; + case X86::BP: DestReg = X86::RBP; break; + case X86::SP: DestReg = X86::RSP; break; + } + if (DestReg) { + Res.first = DestReg; + Res.second = Res.second = X86::GR64RegisterClass; + } } return Res; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0901b2a770..72a282b2de 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -267,6 +267,9 @@ namespace llvm { // X86TargetLowering - X86 Implementation of the TargetLowering interface class X86TargetLowering : public TargetLowering { int VarArgsFrameIndex; // FrameIndex for start of varargs area. + int RegSaveFrameIndex; // X86-64 vararg func register save area. + unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. + unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. int ReturnAddrIndex; // FrameIndex for return slot. int BytesToPopOnReturn; // Number of arg bytes ret should pop. int BytesCallerReserves; // Number of arg bytes caller makes. @@ -347,6 +350,9 @@ namespace llvm { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; + /// X86StackPtr - X86 physical register used as stack ptr. + unsigned X86StackPtr; + /// X86ScalarSSE - Select between SSE2 or x87 floating point ops. bool X86ScalarSSE; @@ -354,6 +360,10 @@ namespace llvm { SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG); SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG); + // X86-64 C Calling Convention implementation. + SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG); + // Fast Calling Convention implementation. SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9d8eab8ca5..81cd9e0831 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])), - TM(tm), RI(*this) { + TM(tm), RI(tm, *this) { } @@ -30,7 +30,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, unsigned& sourceReg, unsigned& destReg) const { MachineOpCode oc = MI.getOpcode(); - if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr || + if (oc == X86::MOV8rr || oc == X86::MOV16rr || + oc == X86::MOV32rr || oc == X86::MOV64rr || oc == X86::MOV16to16_ || oc == X86::MOV32to32_ || oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr || oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || @@ -59,6 +60,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI, case X86::MOV16_rm: case X86::MOV32rm: case X86::MOV32_rm: + case X86::MOV64rm: case X86::FpLD64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -86,6 +88,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(MachineInstr *MI, case X86::MOV16_mr: case X86::MOV32mr: case X86::MOV32_mr: + case X86::MOV64mr: case X86::FpSTP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -145,16 +148,20 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr *MI) const { switch (MI->getOpcode()) { case X86::INC32r: + case X86::INC64_32r: assert(MI->getNumOperands() == 2 && "Unknown inc instruction!"); return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, 1); case X86::INC16r: + case X86::INC64_16r: if (DisableLEA16) return 0; assert(MI->getNumOperands() == 2 && "Unknown inc instruction!"); return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, 1); case X86::DEC32r: + case X86::DEC64_32r: assert(MI->getNumOperands() == 2 && "Unknown dec instruction!"); return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, -1); case X86::DEC16r: + case X86::DEC64_16r: if (DisableLEA16) return 0; assert(MI->getNumOperands() == 2 && "Unknown dec instruction!"); return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, -1); @@ -264,3 +271,10 @@ X86InstrInfo::reverseBranchCondition(MachineBasicBlock::iterator MI) const { return BuildMI(*MBB, MBB->erase(MI), ROpcode, 1).addMBB(TMBB); } +const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const { + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + if (Subtarget->is64Bit()) + return &X86::GR64RegClass; + else + return &X86::GR32RegClass; +} diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 7691798646..01b4cfffcc 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -18,6 +18,7 @@ #include "X86RegisterInfo.h" namespace llvm { + class X86RegisterInfo; class X86TargetMachine; /// X86II - This namespace holds all of the target specific flags that @@ -90,12 +91,18 @@ namespace X86II { // instead of 32 bit data. OpSize = 1 << 6, + // AsSize - Set if this instruction requires an operand size prefix (0x67), + // which most often indicates that the instruction address 16 bit address + // instead of 32 bit address (or 32 bit address in 64 bit mode). + AdSize = 1 << 7, + + //===------------------------------------------------------------------===// // Op0Mask - There are several prefix bytes that are used to form two byte // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is // used to obtain the setting of this field. If no bits in this field is // set, there is no prefix byte for obtaining a multibyte opcode. // - Op0Shift = 7, + Op0Shift = 8, Op0Mask = 0xF << Op0Shift, // TB - TwoByte - Set if this instruction has a two byte opcode, which @@ -118,19 +125,29 @@ namespace X86II { XD = 11 << Op0Shift, XS = 12 << Op0Shift, //===------------------------------------------------------------------===// - // This two-bit field describes the size of an immediate operand. Zero is + // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. + // They are used to specify GPRs and SSE registers, 64-bit operand size, + // etc. We only cares about REX.W and REX.R bits and only the former is + // statically determined. + // + REXShift = 12, + REX_W = 1 << REXShift, + + //===------------------------------------------------------------------===// + // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. - ImmShift = 11, - ImmMask = 3 << ImmShift, + ImmShift = 13, + ImmMask = 7 << ImmShift, Imm8 = 1 << ImmShift, Imm16 = 2 << ImmShift, Imm32 = 3 << ImmShift, + Imm64 = 4 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. // FPTypeMask - Mask for all of the FP types... - FPTypeShift = 13, + FPTypeShift = 16, FPTypeMask = 7 << FPTypeShift, // NotFP - The default, set for instructions that do not use FP registers. @@ -162,9 +179,9 @@ namespace X86II { // SpecialFP - Special instruction forms. Dispatch by opcode explicitly. SpecialFP = 7 << FPTypeShift, - OpcodeShift = 16, + // Bits 19 -> 23 are unused + OpcodeShift = 24, OpcodeMask = 0xFF << OpcodeShift - // Bits 25 -> 31 are unused }; } @@ -216,6 +233,8 @@ public: virtual MachineBasicBlock::iterator reverseBranchCondition(MachineBasicBlock::iterator MI) const; + const TargetRegisterClass *getPointerRegClass() const; + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified opcode number. // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 206faa129c..0f38aea60a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -39,7 +39,7 @@ def SDT_X86CallSeqStart : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; def SDT_X86CallSeqEnd : SDTypeProfile<0, 2, [ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; -def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; @@ -95,7 +95,7 @@ def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; let NumMIOperands = 4; - let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm); } def i8mem : X86MemOperand<"printi8mem">; @@ -107,6 +107,12 @@ def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f128mem : X86MemOperand<"printf128mem">; +def lea32mem : Operand<i32> { + let PrintMethod = "printi32mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); +} + def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; } @@ -129,9 +135,9 @@ def brtarget : Operand<OtherVT>; // // Define X86 specific addressing mode. -def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>; -def leaaddr : ComplexPattern<iPTR, 4, "SelectLEAAddr", - [add, mul, shl, or, frameindex]>; +def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>; +def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr", + [add, mul, shl, or, frameindex]>; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -158,11 +164,13 @@ def MRMInitReg : Format<32>; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. -def HasMMX : Predicate<"Subtarget->hasMMX()">; -def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; -def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; -def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; -def FPStack : Predicate<"!Subtarget->hasSSE2()">; +def HasMMX : Predicate<"Subtarget->hasMMX()">; +def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def FPStack : Predicate<"!Subtarget->hasSSE2()">; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">; +def In64BitMode : Predicate<"Subtarget->is64Bit()">; //===----------------------------------------------------------------------===// // X86 specific pattern fragments. @@ -171,13 +179,14 @@ def FPStack : Predicate<"!Subtarget->hasSSE2()">; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our // machine code emitter. -class ImmType<bits<2> val> { - bits<2> Value = val; +class ImmType<bits<3> val> { + bits<3> Value = val; } def NoImm : ImmType<0>; def Imm8 : ImmType<1>; def Imm16 : ImmType<2>; def Imm32 : ImmType<3>; +def Imm64 : ImmType<4>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -202,7 +211,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr> Format Form = f; bits<6> FormBits = Form.Value; ImmType ImmT = i; - bits<2> ImmTypeBits = ImmT.Value; + bits<3> ImmTypeBits = ImmT.Value; dag OperandList = ops; string AsmString = AsmStr; @@ -210,9 +219,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr> // // Attributes specific to X86 instructions... // - bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix? + bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix? + bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix? bits<4> Prefix = 0; // Which prefix byte does this inst have? + bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix? FPFormat FPForm; // What flavor of FP instruction is this? bits<3> FPFormBits = 0; } @@ -226,6 +237,8 @@ class Imp<list<Register> uses, list<Register> defs> { // Prefix byte classes which are used to indicate to the ad-hoc machine code // emitter that various prefix bytes are required. class OpSize { bit hasOpSizePrefix = 1; } +class AdSize { bit hasAdSizePrefix = 1; } +class REX_W { bit hasREX_WPrefix = 1; } class TB { bits<4> Prefix = 1; } class REP { bits<4> Prefix = 2; } class D8 { bits<4> Prefix = 3; } @@ -276,8 +289,6 @@ def i32immSExt8 : PatLeaf<(i32 imm), [{ }]>; // Helper fragments for loads. -def loadiPTR : PatFrag<(ops node:$ptr), (iPTR (load node:$ptr))>; - def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>; def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>; @@ -308,6 +319,7 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extload node:$ptr, i16))>; //===----------------------------------------------------------------------===// // Instruction templates... +// class I<bits<8> o, Format f, dag ops, string asm, list<dag> pattern> : X86Inst<o, f, NoImm, ops, asm> { @@ -355,13 +367,13 @@ def IMPLICIT_DEF_GR32 : I<0, Pseudo, (ops GR32:$dst), def NOOP : I<0x90, RawFrm, (ops), "nop", []>; // Truncate -def TRUNC_GR32_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src), - "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>; -def TRUNC_GR16_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src), - "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>; -def TRUNC_GR32_GR16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src), - "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}", - [(set GR16:$dst, (trunc GR32:$src))]>; +def TRUNC_32_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src), + "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>; +def TRUNC_16_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src), + "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>; +def TRUNC_32to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src), + "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}", + [(set GR16:$dst, (trunc GR32:$src))]>; //===----------------------------------------------------------------------===// // Control Flow Instructions... @@ -388,7 +400,7 @@ let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in { def JMP32r : I<0xFF, MRM4r, (ops GR32:$dst), "jmp{l} {*}$dst", [(brind GR32:$dst)]>; def JMP32m : I<0xFF, MRM4m, (ops i32mem:$dst), "jmp{l} {*}$dst", - [(brind (loadiPTR addr:$dst))]>; + [(brind (loadi32 addr:$dst))]>; } // Conditional branches @@ -510,9 +522,9 @@ def LEA16r : I<0x8D, MRMSrcMem, (ops GR16:$dst, i32mem:$src), "lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize; def LEA32r : I<0x8D, MRMSrcMem, - (ops GR32:$dst, i32mem:$src), + (ops GR32:$dst, lea32mem:$src), "lea{l} {$src|$dst}, {$dst|$src}", - [(set GR32:$dst, leaaddr:$src)]>; + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}", [(X86rep_movs i8)]>, @@ -1101,9 +1113,10 @@ def INC8r : I<0xFE, MRM0r, (ops GR8 :$dst, GR8 :$src), "inc{b} $dst", [(set GR8:$dst, (add GR8:$src, 1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def INC16r : I<0x40, AddRegFrm, (ops GR16:$dst, GR16:$src), "inc{w} $dst", - [(set GR16:$dst, (add GR16:$src, 1))]>, OpSize; + [(set GR16:$dst, (add GR16:$src, 1))]>, + OpSize, Requires<[In32BitMode]>; def INC32r : I<0x40, AddRegFrm, (ops GR32:$dst, GR32:$src), "inc{l} $dst", - [(set GR32:$dst, (add GR32:$src, 1))]>; + [(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>; } let isTwoAddress = 0, CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (ops i8mem :$dst), "inc{b} $dst", @@ -1119,9 +1132,10 @@ def DEC8r : I<0xFE, MRM1r, (ops GR8 :$dst, GR8 :$src), "dec{b} $dst", [(set GR8:$dst, (add GR8:$src, -1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def DEC16r : I<0x48, AddRegFrm, (ops GR16:$dst, GR16:$src), "dec{w} $dst", - [(set GR16:$dst, (add GR16:$src, -1))]>, OpSize; + [(set GR16:$dst, (add GR16:$src, -1))]>, + OpSize, Requires<[In32BitMode]>; def DEC32r : I<0x48, AddRegFrm, (ops GR32:$dst, GR32:$src), "dec{l} $dst", - [(set GR32:$dst, (add GR32:$src, -1))]>; + [(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>; } let isTwoAddress = 0, CodeSize = 2 in { @@ -2455,7 +2469,7 @@ def DWARF_LABEL : I<0, Pseudo, (ops i32imm:$id), // Non-Instruction Patterns //===----------------------------------------------------------------------===// -// ConstantPool GlobalAddress, ExternalSymbol +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; @@ -2477,18 +2491,16 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), // Calls def : Pat<(X86tailcall GR32:$dst), - (CALL32r GR32:$dst)>; + (CALL32r GR32:$dst)>; -def : Pat<(X86tailcall tglobaladdr:$dst), +def : Pat<(X86tailcall (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; -def : Pat<(X86tailcall texternalsym:$dst), +def : Pat<(X86tailcall (i32 texternalsym:$dst)), (CALLpcrel32 texternalsym:$dst)>; - - -def : Pat<(X86call tglobaladdr:$dst), +def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; -def : Pat<(X86call texternalsym:$dst), +def : Pat<(X86call (i32 texternalsym:$dst)), (CALLpcrel32 texternalsym:$dst)>; // X86 specific add which produces a flag. @@ -2611,3 +2623,9 @@ include "X86InstrMMX.td" //===----------------------------------------------------------------------===// include "X86InstrSSE.td" + +//===----------------------------------------------------------------------===// +// X86-64 Support +//===----------------------------------------------------------------------===// + +include "X86InstrX86-64.td" diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td new file mode 100644 index 0000000000..e6bfbc4967 --- /dev/null +++ b/lib/Target/X86/X86InstrX86-64.td @@ -0,0 +1,1084 @@ +//====- X86InstrX86-64.td - Describe the X86 Instruction Set ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86-64 instruction set, defining the instructions, +// and properties of the instructions which are needed for code generation, +// machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand Definitions... +// + +// 64-bits but only 32 bits are significant. +def i64i32imm : Operand<i64>; +// 64-bits but only 8 bits are significant. +def i64i8imm : Operand<i64>; + +def lea64mem : Operand<i64> { + let PrintMethod = "printi64mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm); +} + +def lea64_32mem : Operand<i32> { + let PrintMethod = "printlea64_32mem"; + let NumMIOperands = 4; + let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); +} + +//===----------------------------------------------------------------------===// +// Complex Pattern Definitions... +// +def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", + [add, mul, shl, or, frameindex, X86Wrapper]>; + +//===----------------------------------------------------------------------===// +// Instruction templates... +// + +class RI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : I<o, F, ops, asm, pattern>, REX_W; +class RIi8 <bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : Ii8<o, F, ops, asm, pattern>, REX_W; +class RIi32 <bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : Ii32<o, F, ops, asm, pattern>, REX_W; + +class RIi64<bits<8> o, Format f, dag ops, string asm, list<dag> pattern> + : X86Inst<o, f, Imm64, ops, asm>, REX_W { + let Pattern = pattern; + let CodeSize = 3; +} + +class RSSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : SSI<o, F, ops, asm, pattern>, REX_W; +class RSDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : SDI<o, F, ops, asm, pattern>, REX_W; + +//===----------------------------------------------------------------------===// +// Pattern fragments... +// + +def i64immSExt32 : PatLeaf<(i64 imm), [{ + // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // sign extended field. + return (int64_t)N->getValue() == (int32_t)N->getValue(); +}]>; + +def i64immZExt32 : PatLeaf<(i64 imm), [{ + // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // unsignedsign extended field. + return (uint64_t)N->getValue() == (uint32_t)N->getValue(); +}]>; + +def i64immSExt8 : PatLeaf<(i64 imm), [{ + // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit + // sign extended field. + return (int64_t)N->getValue() == (int8_t)N->getValue(); +}]>; + +def sextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i1))>; +def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i8))>; +def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i16))>; +def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextload node:$ptr, i32))>; + +def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i1))>; +def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i8))>; +def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i16))>; +def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextload node:$ptr, i32))>; + +def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i1))>; +def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i8))>; +def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i16))>; +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extload node:$ptr, i32))>; + +//===----------------------------------------------------------------------===// +// Instruction list... +// + +def IMPLICIT_DEF_GR64 : I<0, Pseudo, (ops GR64:$dst), + "#IMPLICIT_DEF $dst", + [(set GR64:$dst, (undef))]>; + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1, noResults = 1 in + // All calls clobber the non-callee saved registers... + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15] in { + def CALL64pcrel32 : I<0xE8, RawFrm, (ops i64imm:$dst, variable_ops), + "call ${dst:call}", []>; + def CALL64r : I<0xFF, MRM2r, (ops GR64:$dst, variable_ops), + "call {*}$dst", [(X86call GR64:$dst)]>; + def CALL64m : I<0xFF, MRM2m, (ops i64mem:$dst, variable_ops), + "call {*}$dst", []>; + } + +// Branches +let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in { + def JMP64r : I<0xFF, MRM4r, (ops GR64:$dst), "jmp{q} {*}$dst", + [(brind GR64:$dst)]>; + def JMP64m : I<0xFF, MRM4m, (ops i64mem:$dst), "jmp{q} {*}$dst", + [(brind (loadi64 addr:$dst))]>; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions... +// +def LEAVE64 : I<0xC9, RawFrm, + (ops), "leave", []>, Imp<[RBP,RSP],[RBP,RSP]>; +def POP64r : I<0x58, AddRegFrm, + (ops GR64:$reg), "pop{q} $reg", []>, Imp<[RSP],[RSP]>; + +def LEA64_32r : I<0x8D, MRMSrcMem, + (ops GR32:$dst, lea64_32mem:$src), + "lea{l} {$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; + +def LEA64r : RI<0x8D, MRMSrcMem, (ops GR64:$dst, lea64mem:$src), + "lea{q} {$src|$dst}, {$dst|$src}", + [(set GR64:$dst, lea64addr:$src)]>; + +let isTwoAddress = 1 in +def BSWAP64r : RI<0xC8, AddRegFrm, (ops GR64:$dst, GR64:$src), + "bswap{q} $dst", + [(set GR64:$dst, (bswap GR64:$src))]>, TB; +// Exchange +def XCHG64rr : RI<0x87, MRMDestReg, (ops GR64:$src1, GR64:$src2), + "xchg{q} {$src2|$src1}, {$src1|$src2}", []>; +def XCHG64mr : RI<0x87, MRMDestMem, (ops i64mem:$src1, GR64:$src2), + "xchg{q} {$src2|$src1}, {$src1|$src2}", []>; +def XCHG64rm : RI<0x87, MRMSrcMem, (ops GR64:$src1, i64mem:$src2), + "xchg{q} {$src2|$src1}, {$src1|$src2}", []>; + +// Repeat string ops +def REP_MOVSQ : RI<0xA5, RawFrm, (ops), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)]>, + Imp<[RCX,RDI,RSI], [RCX,RDI,RSI]>, REP; +def REP_STOSQ : RI<0xAB, RawFrm, (ops), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)]>, + Imp<[RAX,RCX,RDI], [RCX,RDI]>, REP; + +//===----------------------------------------------------------------------===// +// Move Instructions... +// + +def MOV64rr : RI<0x89, MRMDestReg, (ops GR64:$dst, GR64:$src), + "mov{q} {$src, $dst|$dst, $src}", []>; + +def MOV64ri : RIi64<0xB8, AddRegFrm, (ops GR64:$dst, i64imm:$src), + "movabs{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, imm:$src)]>; +def MOV64ri32 : RIi32<0xC7, MRM0r, (ops GR64:$dst, i64i32imm:$src), + "mov{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, i64immSExt32:$src)]>; + +def MOV64rm : RI<0x8B, MRMSrcMem, (ops GR64:$dst, i64mem:$src), + "mov{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (load addr:$src))]>; + +def MOV64mr : RI<0x89, MRMDestMem, (ops i64mem:$dst, GR64:$src), + "mov{q} {$src, $dst|$dst, $src}", + [(store GR64:$src, addr:$dst)]>; +def MOV64mi32 : RIi32<0xC7, MRM0m, (ops i64mem:$dst, i64i32imm:$src), + "mov{q} {$src, $dst|$dst, $src}", + [(store i64immSExt32:$src, addr:$dst)]>; + +// Sign/Zero extenders + +def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (ops GR64:$dst, GR8 :$src), + "movs{bq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR8:$src))]>, TB; +def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (ops GR64:$dst, i8mem :$src), + "movs{bq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; +def MOVSX64rr16: RI<0xBF, MRMSrcReg, (ops GR64:$dst, GR16:$src), + "movs{wq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR16:$src))]>, TB; +def MOVSX64rm16: RI<0xBF, MRMSrcMem, (ops GR64:$dst, i16mem:$src), + "movs{wq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; +def MOVSX64rr32: RI<0x63, MRMSrcReg, (ops GR64:$dst, GR32:$src), + "movs{lq|xd} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR32:$src))]>; +def MOVSX64rm32: RI<0x63, MRMSrcMem, (ops GR64:$dst, i32mem:$src), + "movs{lq|xd} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; + +def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (ops GR64:$dst, GR8 :$src), + "movz{bq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (zext GR8:$src))]>, TB; +def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (ops GR64:$dst, i8mem :$src), + "movz{bq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; +def MOVZX64rr16: RI<0xB7, MRMSrcReg, (ops GR64:$dst, GR16:$src), + "movz{wq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (zext GR16:$src))]>, TB; +def MOVZX64rm16: RI<0xB7, MRMSrcMem, (ops GR64:$dst, i16mem:$src), + "movz{wq|x} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + +def CDQE : RI<0x98, RawFrm, (ops), + "{cltq|cdqe}", []>, Imp<[EAX],[RAX]>; // RAX = signext(EAX) + +def CQO : RI<0x99, RawFrm, (ops), + "{cqto|cqo}", []>, Imp<[RAX],[RAX,RDX]>; // RDX:RAX = signext(RAX) + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions... +// + +let isTwoAddress = 1 in { +let isConvertibleToThreeAddress = 1 in { +let isCommutable = 1 in +def ADD64rr : RI<0x01, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, GR64:$src2))]>; + +def ADD64ri32 : RIi32<0x81, MRM0r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2))]>; +def ADD64ri8 : RIi8<0x83, MRM0r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2))]>; +} // isConvertibleToThreeAddress + +def ADD64rm : RI<0x03, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, (load addr:$src2)))]>; +} // isTwoAddress + +def ADD64mr : RI<0x01, MRMDestMem, (ops i64mem:$dst, GR64:$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), GR64:$src2), addr:$dst)]>; +def ADD64mi32 : RIi32<0x81, MRM0m, (ops i64mem:$dst, i64i32imm :$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; +def ADD64mi8 : RIi8<0x83, MRM0m, (ops i64mem:$dst, i64i8imm :$src2), + "add{q} {$src2, $dst|$dst, $src2}", + [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def ADC64rr : RI<0x11, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; + +def ADC64rm : RI<0x13, MRMSrcMem , (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>; + +def ADC64ri32 : RIi32<0x81, MRM2r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; +def ADC64ri8 : RIi8<0x83, MRM2r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>; +} // isTwoAddress + +def ADC64mr : RI<0x11, MRMDestMem, (ops i64mem:$dst, GR64:$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>; +def ADC64mi32 : RIi32<0x81, MRM2m, (ops i64mem:$dst, i64i32imm:$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; +def ADC64mi8 : RIi8<0x83, MRM2m, (ops i64mem:$dst, i64i8imm :$src2), + "adc{q} {$src2, $dst|$dst, $src2}", + [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + +let isTwoAddress = 1 in { +def SUB64rr : RI<0x29, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>; + +def SUB64rm : RI<0x2B, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>; + +def SUB64ri32 : RIi32<0x81, MRM5r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>; +def SUB64ri8 : RIi8<0x83, MRM5r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>; +} // isTwoAddress + +def SUB64mr : RI<0x29, MRMDestMem, (ops i64mem:$dst, GR64:$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>; +def SUB64mi32 : RIi32<0x81, MRM5m, (ops i64mem:$dst, i64i32imm:$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; +def SUB64mi8 : RIi8<0x83, MRM5m, (ops i64mem:$dst, i64i8imm :$src2), + "sub{q} {$src2, $dst|$dst, $src2}", + [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + +let isTwoAddress = 1 in { +def SBB64rr : RI<0x19, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; + +def SBB64rm : RI<0x1B, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>; + +def SBB64ri32 : RIi32<0x81, MRM3r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; +def SBB64ri8 : RIi8<0x83, MRM3r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>; +} // isTwoAddress + +def SBB64mr : RI<0x19, MRMDestMem, (ops i64mem:$dst, GR64:$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; +def SBB64mi32 : RIi32<0x81, MRM3m, (ops i64mem:$dst, i64i32imm:$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; +def SBB64mi8 : RIi8<0x83, MRM3m, (ops i64mem:$dst, i64i8imm :$src2), + "sbb{q} {$src2, $dst|$dst, $src2}", + [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + +// Unsigned multiplication +def MUL64r : RI<0xF7, MRM4r, (ops GR64:$src), + "mul{q} $src", []>, + Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*GR64 +def MUL64m : RI<0xF7, MRM4m, (ops i64mem:$src), + "mul{q} $src", []>, + Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*[mem64] + +// Signed multiplication +def IMUL64r : RI<0xF7, MRM5r, (ops GR64:$src), + "imul{q} $src", []>, + Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*GR64 +def IMUL64m : RI<0xF7, MRM5m, (ops i64mem:$src), + "imul{q} $src", []>, + Imp<[RAX],[RAX,RDX]>; // RAX,RDX = RAX*[mem64] + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def IMUL64rr : RI<0xAF, MRMSrcReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "imul{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB; + +def IMUL64rm : RI<0xAF, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "imul{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB; +} // isTwoAddress + +// Suprisingly enough, these are not two address instructions! +def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 + (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>; +def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 + (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>; +def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 + (ops GR64:$dst, i64mem:$src1, i64i32imm:$src2), + "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>; +def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 + (ops GR64:$dst, i64mem:$src1, i64i8imm: $src2), + "imul{q} {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>; + +// Unsigned division / remainder +def DIV64r : RI<0xF7, MRM6r, (ops GR64:$src), // RDX:RAX/r64 = RAX,RDX + "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>; +def DIV64m : RI<0xF7, MRM6m, (ops i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX + "div{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>; + +// Signed division / remainder +def IDIV64r: RI<0xF7, MRM7r, (ops GR64:$src), // RDX:RAX/r64 = RAX,RDX + "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>; +def IDIV64m: RI<0xF7, MRM7m, (ops i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX + "idiv{q} $src", []>, Imp<[RAX,RDX],[RAX,RDX]>; + +// Unary instructions +let CodeSize = 2 in { +let isTwoAddress = 1 in +def NEG64r : RI<0xF7, MRM3r, (ops GR64:$dst, GR64:$src), "neg{q} $dst", + [(set GR64:$dst, (ineg GR64:$src))]>; +def NEG64m : RI<0xF7, MRM3m, (ops i64mem:$dst), "neg{q} $dst", + [(store (ineg (loadi64 addr:$dst)), addr:$dst)]>; + +let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +def INC64r : RI<0xFF, MRM0r, (ops GR64:$dst, GR64:$src), "inc{q} $dst", + [(set GR64:$dst, (add GR64:$src, 1))]>; +def INC64m : RI<0xFF, MRM0m, (ops i64mem:$dst), "inc{q} $dst", + [(store (add (loadi64 addr:$dst), 1), addr:$dst)]>; + +let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +def DEC64r : RI<0xFF, MRM1r, (ops GR64:$dst, GR64:$src), "dec{q} $dst", + [(set GR64:$dst, (add GR64:$src, -1))]>; +def DEC64m : RI<0xFF, MRM1m, (ops i64mem:$dst), "dec{q} $dst", + [(store (add (loadi64 addr:$dst), -1), addr:$dst)]>; + +// In 64-bit mode, single byte INC and DEC cannot be encoded. +let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { +// Can transform into LEA. +def INC64_16r : I<0xFF, MRM0r, (ops GR16:$dst, GR16:$src), "inc{w} $dst", + [(set GR16:$dst, (add GR16:$src, 1))]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32r : I<0xFF, MRM0r, (ops GR32:$dst, GR32:$src), "inc{l} $dst", + [(set GR32:$dst, (add GR32:$src, 1))]>, + Requires<[In64BitMode]>; +def DEC64_16r : I<0xFF, MRM1r, (ops GR16:$dst, GR16:$src), "dec{w} $dst", + [(set GR16:$dst, (add GR16:$src, -1))]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32r : I<0xFF, MRM1r, (ops GR32:$dst, GR32:$src), "dec{l} $dst", + [(set GR32:$dst, (add GR32:$src, -1))]>, + Requires<[In64BitMode]>; +} // isConvertibleToThreeAddress +} // CodeSize + + +// Shift instructions +let isTwoAddress = 1 in { +def SHL64rCL : RI<0xD3, MRM4r, (ops GR64:$dst, GR64:$src), + "shl{q} {%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (shl GR64:$src, CL))]>, + Imp<[CL],[]>; +def SHL64ri : RIi8<0xC1, MRM4r, (ops GR64:$dst, GR64:$src1, i8imm:$src2), + "shl{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; +def SHL64r1 : RI<0xD1, MRM4r, (ops GR64:$dst, GR64:$src1), + "shl{q} $dst", []>; +} // isTwoAddress + +def SHL64mCL : RI<0xD3, MRM4m, (ops i64mem:$dst), + "shl{q} {%cl, $dst|$dst, %CL}", + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>, + Imp<[CL],[]>; +def SHL64mi : RIi8<0xC1, MRM4m, (ops i64mem:$dst, i8imm:$src), + "shl{q} {$src, $dst|$dst, $src}", + [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHL64m1 : RI<0xC1, MRM4m, (ops i64mem:$dst), + "shl{q} $dst", + [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let isTwoAddress = 1 in { +def SHR64rCL : RI<0xD3, MRM5r, (ops GR64:$dst, GR64:$src), + "shr{q} {%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (srl GR64:$src, CL))]>, + Imp<[CL],[]>; +def SHR64ri : RIi8<0xC1, MRM5r, (ops GR64:$dst, GR64:$src1, i8imm:$src2), + "shr{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; +def SHR64r1 : RI<0xD1, MRM5r, (ops GR64:$dst, GR64:$src1), + "shr{q} $dst", + [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +def SHR64mCL : RI<0xD3, MRM5m, (ops i64mem:$dst), + "shr{q} {%cl, $dst|$dst, %CL}", + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>, + Imp<[CL],[]>; +def SHR64mi : RIi8<0xC1, MRM5m, (ops i64mem:$dst, i8imm:$src), + "shr{q} {$src, $dst|$dst, $src}", + [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHR64m1 : RI<0xC1, MRM5m, (ops i64mem:$dst), + "shr{q} $dst", + [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let isTwoAddress = 1 in { +def SAR64rCL : RI<0xD3, MRM7r, (ops GR64:$dst, GR64:$src), + "sar{q} {%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (sra GR64:$src, CL))]>, Imp<[CL],[]>; +def SAR64ri : RIi8<0xC1, MRM7r, (ops GR64:$dst, GR64:$src1, i8imm:$src2), + "sar{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; +def SAR64r1 : RI<0xD1, MRM7r, (ops GR64:$dst, GR64:$src1), + "sar{q} $dst", + [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +def SAR64mCL : RI<0xD3, MRM7m, (ops i64mem:$dst), + "sar{q} {%cl, $dst|$dst, %CL}", + [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>, + Imp<[CL],[]>; +def SAR64mi : RIi8<0xC1, MRM7m, (ops i64mem:$dst, i8imm:$src), + "sar{q} {$src, $dst|$dst, $src}", + [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SAR64m1 : RI<0xC1, MRM7m, (ops i64mem:$dst), + "sar{q} $dst", + [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +// Rotate instructions +let isTwoAddress = 1 in { +def ROL64rCL : RI<0xD3, MRM0r, (ops GR64:$dst, GR64:$src), + "rol{q} {%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotl GR64:$src, CL))]>, Imp<[CL],[]>; +def ROL64ri : RIi8<0xC1, MRM0r, (ops GR64:$dst, GR64:$src1, i8imm:$src2), + "rol{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; +def ROL64r1 : RI<0xC1, MRM0r, (ops GR64:$dst, GR64:$src1), + "rol{q} $dst", + [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +def ROL64mCL : I<0xD3, MRM0m, (ops i64mem:$dst), + "rol{q} {%cl, $dst|$dst, %CL}", + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>, + Imp<[CL],[]>; +def ROL64mi : RIi8<0xC1, MRM0m, (ops i64mem:$dst, i8imm:$src), + "rol{q} {$src, $dst|$dst, $src}", + [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROL64m1 : RI<0xD1, MRM0m, (ops i64mem:$dst), + "rol{q} $dst", + [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let isTwoAddress = 1 in { +def ROR64rCL : RI<0xD3, MRM1r, (ops GR64:$dst, GR64:$src), + "ror{q} {%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotr GR64:$src, CL))]>, Imp<[CL],[]>; +def ROR64ri : RIi8<0xC1, MRM1r, (ops GR64:$dst, GR64:$src1, i8imm:$src2), + "ror{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; +def ROR64r1 : RI<0xC1, MRM1r, (ops GR64:$dst, GR64:$src1), + "ror{q} $dst", + [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; +} // isTwoAddress + +def ROR64mCL : RI<0xD3, MRM1m, (ops i64mem:$dst), + "ror{q} {%cl, $dst|$dst, %CL}", + [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>, + Imp<[CL],[]>; +def ROR64mi : RIi8<0xC1, MRM1m, (ops i64mem:$dst, i8imm:$src), + "ror{q} {$src, $dst|$dst, $src}", + [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROR64m1 : RI<0xD1, MRM1m, (ops i64mem:$dst), + "ror{q} $dst", + [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +// Double shift instructions (generalizations of rotate) +let isTwoAddress = 1 in { +def SHLD64rrCL : RI<0xA5, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>, + Imp<[CL],[]>, TB; +def SHRD64rrCL : RI<0xAD, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>, + Imp<[CL],[]>, TB; + +let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction +def SHLD64rri8 : RIi8<0xA4, MRMDestReg, + (ops GR64:$dst, GR64:$src1, GR64:$src2, i8imm:$src3), + "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>, + TB; +def SHRD64rri8 : RIi8<0xAC, MRMDestReg, + (ops GR64:$dst, GR64:$src1, GR64:$src2, i8imm:$src3), + "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>, + TB; +} // isCommutable +} // isTwoAddress + +// Temporary hack: there is no patterns associated with these instructions +// so we have to tell tblgen that these do not produce results. +let noResults = 1 in { +def SHLD64mrCL : RI<0xA5, MRMDestMem, (ops i64mem:$dst, GR64:$src2), + "shld{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>, + Imp<[CL],[]>, TB; +def SHRD64mrCL : RI<0xAD, MRMDestMem, (ops i64mem:$dst, GR64:$src2), + "shrd{q} {%cl, $src2, $dst|$dst, $src2, %CL}", []>, + Imp<[CL],[]>, TB; +def SHLD64mri8 : RIi8<0xA4, MRMDestMem, + (ops i64mem:$dst, GR64:$src2, i8imm:$src3), + "shld{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>, + TB; +def SHRD64mri8 : RIi8<0xAC, MRMDestMem, + (ops i64mem:$dst, GR64:$src2, i8imm:$src3), + "shrd{q} {$src3, $src2, $dst|$dst, $src2, $src3}", []>, + TB; +} // noResults + +//===----------------------------------------------------------------------===// +// Logical Instructions... +// + +let isTwoAddress = 1 in +def NOT64r : RI<0xF7, MRM2r, (ops GR64:$dst, GR64:$src), "not{q} $dst", + [(set GR64:$dst, (not GR64:$src))]>; +def NOT64m : RI<0xF7, MRM2m, (ops i64mem:$dst), "not{q} $dst", + [(store (not (loadi64 addr:$dst)), addr:$dst)]>; + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def AND64rr : RI<0x21, MRMDestReg, + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "and{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>; +def AND64rm : RI<0x23, MRMSrcMem, + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "and{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, (load addr:$src2)))]>; +def AND64ri32 : RIi32<0x81, MRM4r, + (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "and{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2))]>; +def AND64ri8 : RIi8<0x83, MRM4r, + (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "and{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2))]>; +} // isTwoAddress + +def AND64mr : RI<0x21, MRMDestMem, + (ops i64mem:$dst, GR64:$src), + "and{q} {$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), GR64:$src), addr:$dst)]>; +def AND64mi32 : RIi32<0x81, MRM4m, + (ops i64mem:$dst, i64i32imm:$src), + "and{q} {$src, $dst|$dst, $src}", + [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>; +def AND64mi8 : RIi8<0x83, MRM4m, + (ops i64mem:$dst, i64i8imm :$src), + "and{q} {$src, $dst|$dst, $src}", + [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst)]>; + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def OR64rr : RI<0x09, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "or{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>; +def OR64rm : RI<0x0B, MRMSrcMem , (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "or{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, (load addr:$src2)))]>; +def OR64ri32 : RIi32<0x81, MRM1r, (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "or{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2))]>; +def OR64ri8 : RIi8<0x83, MRM1r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "or{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2))]>; +} // isTwoAddress + +def OR64mr : RI<0x09, MRMDestMem, (ops i64mem:$dst, GR64:$src), + "or{q} {$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), GR64:$src), addr:$dst)]>; +def OR64mi32 : RIi32<0x81, MRM1m, (ops i64mem:$dst, i64i32imm:$src), + "or{q} {$src, $dst|$dst, $src}", + [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>; +def OR64mi8 : RIi8<0x83, MRM1m, (ops i64mem:$dst, i64i8imm:$src), + "or{q} {$src, $dst|$dst, $src}", + [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst)]>; + +let isTwoAddress = 1 in { +let isCommutable = 1 in +def XOR64rr : RI<0x31, MRMDestReg, (ops GR64:$dst, GR64:$src1, GR64:$src2), + "xor{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>; +def XOR64rm : RI<0x33, MRMSrcMem, (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "xor{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2)))]>; +def XOR64ri32 : RIi32<0x81, MRM6r, + (ops GR64:$dst, GR64:$src1, i64i32imm:$src2), + "xor{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2))]>; +def XOR64ri8 : RIi8<0x83, MRM6r, (ops GR64:$dst, GR64:$src1, i64i8imm:$src2), + "xor{q} {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2))]>; +} // isTwoAddress + +def XOR64mr : RI<0x31, MRMDestMem, (ops i64mem:$dst, GR64:$src), + "xor{q} {$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), GR64:$src), addr:$dst)]>; +def XOR64mi32 : RIi32<0x81, MRM6m, (ops i64mem:$dst, i64i32imm:$src), + "xor{q} {$src, $dst|$dst, $src}", + [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst)]>; +def XOR64mi8 : RIi8<0x83, MRM6m, (ops i64mem:$dst, i64i8imm :$src), + "xor{q} {$src, $dst|$dst, $src}", + [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst)]>; + +//===----------------------------------------------------------------------===// +// Comparison Instructions... +// + +// Integer comparison +let isCommutable = 1 in +def TEST64rr : RI<0x85, MRMDestReg, (ops GR64:$src1, GR64:$src2), + "test{q} {$src2, $src1|$src1, $src2}", + [(X86cmp (and GR64:$src1, GR64:$src2), 0)]>; +def TEST64mr : RI<0x85, MRMDestMem, (ops i64mem:$src1, GR64:$src2), + "test{q} {$src2, $src1|$src1, $src2}", + [/*(X86cmp (and (loadi64 addr:$src1), GR64:$src2), 0)*/]>; +def TEST64rm : RI<0x85, MRMSrcMem, (ops GR64:$src1, i64mem:$src2), + "test{q} {$src2, $src1|$src1, $src2}", + [/*(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0)*/]>; +def TEST64ri32 : RIi32<0xF7, MRM0r, (ops GR64:$src1, i64i32imm:$src2), + "test{q} {$src2, $src1|$src1, $src2}", + [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0)]>; +def TEST64mi32 : RIi32<0xF7, MRM0m, (ops i64mem:$src1, i64i32imm:$src2), + "test{q} {$src2, $src1|$src1, $src2}", + [/*(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), + 0)*/]>; + +def CMP64rr : RI<0x39, MRMDestReg, (ops GR64:$src1, GR64:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, GR64:$src2)]>; +def CMP64mr : RI<0x39, MRMDestMem, (ops i64mem:$src1, GR64:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp (loadi64 addr:$src1), GR64:$src2)]>; +def CMP64rm : RI<0x3B, MRMSrcMem, (ops GR64:$src1, i64mem:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, (loadi64 addr:$src2))]>; +def CMP64ri32 : RIi32<0x81, MRM7r, (ops GR64:$src1, i64i32imm:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, i64immSExt32:$src2)]>; +def CMP64mi32 : RIi32<0x81, MRM7m, (ops i64mem:$src1, i64i32imm:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2)]>; +def CMP64mi8 : RIi8<0x83, MRM7m, (ops i64mem:$src1, i64i8imm:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2)]>; +def CMP64ri8 : RIi8<0x83, MRM7r, (ops GR64:$src1, i64i8imm:$src2), + "cmp{q} {$src2, $src1|$src1, $src2}", + [(X86cmp GR64:$src1, i64immSExt8:$src2)]>; + +// Conditional moves +let isTwoAddress = 1 in { +def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovb {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_B))]>, TB; +def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovb {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_B))]>, TB; +def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovae {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_AE))]>, TB; +def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovae {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_AE))]>, TB; +def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmove {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_E))]>, TB; +def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmove {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_E))]>, TB; +def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovne {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NE))]>, TB; +def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovne {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NE))]>, TB; +def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovbe {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_BE))]>, TB; +def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovbe {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_BE))]>, TB; +def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmova {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_A))]>, TB; +def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmova {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_A))]>, TB; +def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovl {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_L))]>, TB; +def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovl {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_L))]>, TB; +def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovge {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_GE))]>, TB; +def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovge {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_GE))]>, TB; +def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovle {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_LE))]>, TB; +def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovle {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_LE))]>, TB; +def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovg {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_G))]>, TB; +def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovg {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_G))]>, TB; +def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovs {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_S))]>, TB; +def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovs {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_S))]>, TB; +def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovns {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NS))]>, TB; +def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovns {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NS))]>, TB; +def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovp {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_P))]>, TB; +def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovp {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_P))]>, TB; +def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64 + (ops GR64:$dst, GR64:$src1, GR64:$src2), + "cmovnp {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, + X86_COND_NP))]>, TB; +def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64] + (ops GR64:$dst, GR64:$src1, i64mem:$src2), + "cmovnp {$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + X86_COND_NP))]>, TB; +} // isTwoAddress + +//===----------------------------------------------------------------------===// +// Conversion Instructions... +// + +// f64 -> signed i64 +def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (ops GR64:$dst, VR128:$src), + "cvtsd2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic +def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (ops GR64:$dst, f128mem:$src), + "cvtsd2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic +def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (ops GR64:$dst, FR64:$src), + "cvttsd2si{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint FR64:$src))]>; +def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (ops GR64:$dst, f64mem:$src), + "cvttsd2si{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (ops GR64:$dst, VR128:$src), + "cvttsd2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic +def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (ops GR64:$dst, f128mem:$src), + "cvttsd2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic + +// Signed i64 -> f64 +def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (ops FR64:$dst, GR64:$src), + "cvtsi2sd{q} {$src, $dst|$dst, $src}", + [(set FR64:$dst, (sint_to_fp GR64:$src))]>; +def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (ops FR64:$dst, i64mem:$src), + "cvtsi2sd{q} {$src, $dst|$dst, $src}", + [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; +let isTwoAddress = 1 in { +def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, GR64:$src2), + "cvtsi2sd{q} {$src2, $dst|$dst, $src2}", + []>; // TODO: add intrinsic +def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i64mem:$src2), + "cvtsi2sd{q} {$src2, $dst|$dst, $src2}", + []>; // TODO: add intrinsic +} // isTwoAddress + +// Signed i64 -> f32 +def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (ops FR32:$dst, GR64:$src), + "cvtsi2ss{q} {$src, $dst|$dst, $src}", + [(set FR32:$dst, (sint_to_fp GR64:$src))]>; +def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (ops FR32:$dst, i64mem:$src), + "cvtsi2ss{q} {$src, $dst|$dst, $src}", + [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; +let isTwoAddress = 1 in { +def Int_CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, GR64:$src2), + "cvtsi2ss{q} {$src2, $dst|$dst, $src2}", + []>; // TODO: add intrinsic +def Int_CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i64mem:$src2), + "cvtsi2ss{q} {$src2, $dst|$dst, $src2}", + []>; // TODO: add intrinsic +} // isTwoAddress + +// f32 -> signed i64 +def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (ops GR64:$dst, VR128:$src), + "cvtss2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic +def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (ops GR64:$dst, f32mem:$src), + "cvtss2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic +def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (ops GR64:$dst, FR32:$src), + "cvttss2si{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint FR32:$src))]>; +def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (ops GR64:$dst, f32mem:$src), + "cvttss2si{q} {$src, $dst|$dst, $src}", + [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>; +def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (ops GR64:$dst, VR128:$src), + "cvttss2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic +def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (ops GR64:$dst, f32mem:$src), + "cvttss2si{q} {$src, $dst|$dst, $src}", + []>; // TODO: add intrinsic + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Truncate +// In 64-mode, each 64-bit and 32-bit registers has a low 8-bit sub-register. +def TRUNC_64to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR64:$src), + "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", + [(set GR8:$dst, (trunc GR64:$src))]>; +def TRUNC_32to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32:$src), + "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", + [(set GR8:$dst, (trunc GR32:$src))]>, + Requires<[In64BitMode]>; +def TRUNC_16to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16:$src), + "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", + [(set GR8:$dst, (trunc GR16:$src))]>, + Requires<[In64BitMode]>; + +def TRUNC_64to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR64:$src), + "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}", + [(set GR16:$dst, (trunc GR64:$src))]>; + +def TRUNC_64to32 : I<0x89, MRMDestReg, (ops GR32:$dst, GR64:$src), + "mov{l} {${src:subreg32}, $dst|$dst, ${src:subreg32}}", + [(set GR32:$dst, (trunc GR64:$src))]>; + +// Zero-extension +// TODO: Remove this after proper i32 -> i64 zext support. +def PsMOVZX64rr32: I<0x89, MRMDestReg, (ops GR64:$dst, GR32:$src), + "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR32:$src))]>; +def PsMOVZX64rm32: I<0x8B, MRMSrcMem, (ops GR64:$dst, i32mem:$src), + "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + + +// Alias instructions that map movr0 to xor. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove +// when we have a better way to specify isel priority. +let AddedComplexity = 1 in +def MOV64r0 : RI<0x31, MRMInitReg, (ops GR64:$dst), + "xor{q} $dst, $dst", + [(set GR64:$dst, 0)]>; + +// Materialize i64 constant where top 32-bits are zero. +let AddedComplexity = 1 in +def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (ops GR64:$dst, i64i32imm:$src), + "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, i64immZExt32:$src)]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// Calls +// Direct PC relative function call for small code model. 32-bit displacement +// sign extended to 64-bit. +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (CALL64pcrel32 texternalsym:$dst)>; + +def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>; +def : Pat<(X86tailcall (i64 texternalsym:$dst)), + (CALL64pcrel32 texternalsym:$dst)>; + +def : Pat<(X86tailcall GR64:$dst), + (CALL64r GR64:$dst)>; + +// {s|z}extload bool -> {s|z}extload byte +def : Pat<(sextloadi64i1 addr:$src), (MOVSX64rm8 addr:$src)>; +def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; + +// extload +def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; +def : Pat<(extloadi64i32 addr:$src), (PsMOVZX64rm32 addr:$src)>; + +// anyext -> zext +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16:$src)>; +def : Pat<(i64 (anyext GR32:$src)), (PsMOVZX64rr32 GR32:$src)>; +def : Pat<(i64 (anyext (loadi8 addr:$src))), (MOVZX64rm8 addr:$src)>; +def : Pat<(i64 (anyext (loadi16 addr:$src))), (MOVZX64rm16 addr:$src)>; +def : Pat<(i64 (anyext (loadi32 addr:$src))), (PsMOVZX64rm32 addr:$src)>; + +//===----------------------------------------------------------------------===// +// Some peepholes +//===----------------------------------------------------------------------===// + +// (shl x, 1) ==> (add x, x) +def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; + +// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c) +def : Pat<(or (srl GR64:$src1, CL:$amt), + (shl GR64:$src2, (sub 64, CL:$amt))), + (SHRD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt), + (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), + (SHRD64mrCL addr:$dst, GR64:$src2)>; + +// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) +def : Pat<(or (shl GR64:$src1, CL:$amt), + (srl GR64:$src2, (sub 64, CL:$amt))), + (SHLD64rrCL GR64:$src1, GR64:$src2)>; + +def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt), + (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), + (SHLD64mrCL addr:$dst, GR64:$src2)>; diff --git a/lib/Target/X86/X86IntelAsmPrinter.cpp b/lib/Target/X86/X86IntelAsmPrinter.cpp index ddf807f768..24dbb15941 100755 --- a/lib/Target/X86/X86IntelAsmPrinter.cpp +++ b/lib/Target/X86/X86IntelAsmPrinter.cpp @@ -86,8 +86,9 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO, if (MRegisterInfo::isPhysicalRegister(MO.getReg())) { unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0) - ? MVT::i16 : MVT::i8; + MVT::ValueType VT = (strcmp(Modifier,"subreg64") == 0) ? + MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 : + ((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8)); Reg = getX86SubSuperRegister(Reg, VT); } O << RI.get(Reg).Name; @@ -137,7 +138,8 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO, } } -void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ +void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier) { assert(isMem(MI, Op) && "Invalid memory reference!"); const MachineOperand &BaseReg = MI->getOperand(Op); @@ -156,7 +158,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ O << "["; bool NeedPlus = false; if (BaseReg.getReg()) { - printOp(BaseReg, "mem"); + printOp(BaseReg, Modifier); NeedPlus = true; } @@ -164,7 +166,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){ if (NeedPlus) O << " + "; if (ScaleVal != 1) O << ScaleVal << "*"; - printOp(IndexReg); + printOp(IndexReg, Modifier); NeedPlus = true; } @@ -259,14 +261,21 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) { // See if a truncate instruction can be turned into a nop. switch (MI->getOpcode()) { default: break; - case X86::TRUNC_GR32_GR16: - case X86::TRUNC_GR32_GR8: - case X86::TRUNC_GR16_GR8: { + case X86::TRUNC_64to32: + case X86::TRUNC_64to16: + case X86::TRUNC_32to16: + case X86::TRUNC_32to8: + case X86::TRUNC_16to8: + case X86::TRUNC_32_to8: + case X86::TRUNC_16_to8: { const MachineOperand &MO0 = MI->getOperand(0); const MachineOperand &MO1 = MI->getOperand(1); unsigned Reg0 = MO0.getReg(); unsigned Reg1 = MO1.getReg(); - if (MI->getOpcode() == X86::TRUNC_GR32_GR16) + unsigned Opc = MI->getOpcode(); + if (Opc == X86::TRUNC_64to32) + Reg1 = getX86SubSuperRegister(Reg1, MVT::i32); + else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16) Reg1 = getX86SubSuperRegister(Reg1, MVT::i16); else Reg1 = getX86SubSuperRegister(Reg1, MVT::i8); @@ -275,6 +284,9 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) { O << "\n\t"; break; } + case X86::PsMOVZX64rr32: + O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t"; + break; } // Call the autogenerated instruction printer routines. diff --git a/lib/Target/X86/X86IntelAsmPrinter.h b/lib/Target/X86/X86IntelAsmPrinter.h index ef0af2a6b5..110420bb74 100755 --- a/lib/Target/X86/X86IntelAsmPrinter.h +++ b/lib/Target/X86/X86IntelAsmPrinter.h @@ -80,6 +80,10 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter { O << "XMMWORD PTR "; printMemReference(MI, OpNo); } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo, "subreg64"); + } bool printAsmMRegister(const MachineOperand &MO, const char Mode); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -89,7 +93,8 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter { void printMachineInstruction(const MachineInstr *MI); void printOp(const MachineOperand &MO, const char *Modifier = 0); void printSSECC(const MachineInstr *MI, unsigned Op); - void printMemReference(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); void printPICLabel(const MachineInstr *MI, unsigned Op); bool runOnMachineFunction(MachineFunction &F); bool doInitialization(Module &M); diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 9bb2a72523..7ea7e9ea6d 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -42,7 +42,65 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; // Provide a wrapper for X86CompilationCallback2 that saves non-traditional // callee saved registers, for the fastcc calling convention. extern "C" { -#if defined(__i386__) || defined(i386) || defined(_M_IX86) +#if defined(__x86_64__) + // No need to save EAX/EDX for X86-64. + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl _X86CompilationCallback\n" + "_X86CompilationCallback:\n" + // Save RBP + "pushq %rbp\n" + // Save RSP + "movq %rsp, %rbp\n" + // Save all int arg registers + "pushq %rdi\n" + "pushq %rsi\n" + "pushq %rdx\n" + "pushq %rcx\n" + "pushq %r8\n" + "pushq %r9\n" + // Align stack on 16-byte boundary. ESP might not be properly aligned + // (8 byte) if this is called from an indirect stub. + "andq $-16, %rsp\n" + // Save all XMM arg registers + "subq $128, %rsp\n" + "movaps %xmm0, (%rsp)\n" + "movaps %xmm1, 16(%rsp)\n" + "movaps %xmm2, 32(%rsp)\n" + "movaps %xmm3, 48(%rsp)\n" + "movaps %xmm4, 64(%rsp)\n" + "movaps %xmm5, 80(%rsp)\n" + "movaps %xmm6, 96(%rsp)\n" + "movaps %xmm7, 112(%rsp)\n" + // JIT callee + "movq %rbp, %rdi\n" // Pass prev frame and return address + "movq 8(%rbp), %rsi\n" + "call _X86CompilationCallback2\n" + // Restore all XMM arg registers + "movaps 112(%rsp), %xmm7\n" + "movaps 96(%rsp), %xmm6\n" + "movaps 80(%rsp), %xmm5\n" + "movaps 64(%rsp), %xmm4\n" + "movaps 48(%rsp), %xmm3\n" + "movaps 32(%rsp), %xmm2\n" + "movaps 16(%rsp), %xmm1\n" + "movaps (%rsp), %xmm0\n" + // Restore RSP + "movq %rbp, %rsp\n" + // Restore all int arg registers + "subq $48, %rsp\n" + "popq %r9\n" + "popq %r8\n" + "popq %rcx\n" + "popq %rdx\n" + "popq %rsi\n" + "popq %rdi\n" + // Restore RBP + "popq %rbp\n" + "ret\n"); +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) #ifndef _MSC_VER void X86CompilationCallback(void); asm( @@ -122,7 +180,7 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { "Could not find return address on the stack!"); // It's a stub if there is an interrupt marker after the call. - bool isStub = ((unsigned char*)(intptr_t)RetAddr)[0] == 0xCD; + bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD; // The call instruction should have pushed the return value onto the stack... RetAddr -= 4; // Backtrack to the reference itself... @@ -135,20 +193,20 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { #endif // Sanity check to make sure this really is a call instruction. - assert(((unsigned char*)(intptr_t)RetAddr)[-1] == 0xE8 &&"Not a call instr!"); + assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!"); - unsigned NewVal = (intptr_t)JITCompilerFunction((void*)(intptr_t)RetAddr); + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr); // Rewrite the call target... so that we don't end up here every time we // execute the call. - *(unsigned*)(intptr_t)RetAddr = NewVal-RetAddr-4; + *(unsigned *)RetAddr = (unsigned)(NewVal-RetAddr-4); if (isStub) { // If this is a stub, rewrite the call into an unconditional branch // instruction so that two return addresses are not pushed onto the stack // when the requested function finally gets called. This also makes the // 0xCD byte (interrupt) dead, so the marker doesn't effect anything. - ((unsigned char*)(intptr_t)RetAddr)[-1] = 0xE9; + ((unsigned char*)RetAddr)[-1] = 0xE9; } // Change the return address to reexecute the call instruction... @@ -189,16 +247,17 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR, void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); switch ((X86::RelocationType)MR->getRelocationType()) { - case X86::reloc_pcrel_word: + case X86::reloc_pcrel_word: { // PC relative relocation, add the relocated value to the value already in // memory, after we adjust it for where the PC is. - ResultPtr = ResultPtr-(intptr_t)RelocPos-4; - *((intptr_t*)RelocPos) += ResultPtr; + ResultPtr = ResultPtr-(intptr_t)RelocPos-4-MR->getConstantVal(); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; break; + } case X86::reloc_absolute_word: // Absolute relocation, just add the relocated value to the value already // in memory. - *((intptr_t*)RelocPos) += ResultPtr; + *((unsigned*)RelocPos) += (unsigned)ResultPtr; break; } } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 3a8765330d..037220adbb 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -14,13 +14,13 @@ #include "X86.h" #include "X86RegisterInfo.h" -#include "X86Subtarget.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Constants.h" -#include "llvm/Type.h" #include "llvm/Function.h" +#include "llvm/Type.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" @@ -46,15 +46,32 @@ namespace { cl::Hidden); } -X86RegisterInfo::X86RegisterInfo(const TargetInstrInfo &tii) - : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), TII(tii) {} +X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, + const TargetInstrInfo &tii) + : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), + TM(tm), TII(tii) { + // Cache some information. + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + Is64Bit = Subtarget->is64Bit(); + if (Is64Bit) { + SlotSize = 8; + StackPtr = X86::RSP; + FramePtr = X86::RBP; + } else { + SlotSize = 4; + StackPtr = X86::ESP; + FramePtr = X86::EBP; + } +} void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, int FrameIdx, const TargetRegisterClass *RC) const { unsigned Opc; - if (RC == &X86::GR32RegClass) { + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32RegClass) { Opc = X86::MOV32mr; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; @@ -84,7 +101,9 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC) const{ unsigned Opc; - if (RC == &X86::GR32RegClass) { + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32RegClass) { Opc = X86::MOV32rm; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; @@ -114,7 +133,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *RC) const { unsigned Opc; - if (RC == &X86::GR32RegClass) { + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64rr; + } else if (RC == &X86::GR32RegClass) { Opc = X86::MOV32rr; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rr; @@ -270,12 +291,18 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::ADC32ri, X86::ADC32mi }, { X86::ADC32ri8, X86::ADC32mi8 }, { X86::ADC32rr, X86::ADC32mr }, + { X86::ADC64ri32, X86::ADC64mi32 }, + { X86::ADC64ri8, X86::ADC64mi8 }, + { X86::ADC64rr, X86::ADC64mr }, { X86::ADD16ri, X86::ADD16mi }, { X86::ADD16ri8, X86::ADD16mi8 }, { X86::ADD16rr, X86::ADD16mr }, { X86::ADD32ri, X86::ADD32mi }, { X86::ADD32ri8, X86::ADD32mi8 }, { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD64ri32, X86::ADD64mi32 }, + { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64rr, X86::ADD64mr }, { X86::ADD8ri, X86::ADD8mi }, { X86::ADD8rr, X86::ADD8mr }, { X86::AND16ri, X86::AND16mi }, @@ -284,19 +311,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::AND32ri, X86::AND32mi }, { X86::AND32ri8, X86::AND32mi8 }, { X86::AND32rr, X86::AND32mr }, + { X86::AND64ri32, X86::AND64mi32 }, + { X86::AND64ri8, X86::AND64mi8 }, + { X86::AND64rr, X86::AND64mr }, { X86::AND8ri, X86::AND8mi }, { X86::AND8rr, X86::AND8mr }, { X86::DEC16r, X86::DEC16m }, { X86::DEC32r, X86::DEC32m }, + { X86::DEC64_16r, X86::DEC16m }, + { X86::DEC64_32r, X86::DEC32m }, + { X86::DEC64r, X86::DEC64m }, { X86::DEC8r, X86::DEC8m }, { X86::INC16r, X86::INC16m }, { X86::INC32r, X86::INC32m }, + { X86::INC64_16r, X86::INC16m }, + { X86::INC64_32r, X86::INC32m }, + { X86::INC64r, X86::INC64m }, { X86::INC8r, X86::INC8m }, { X86::NEG16r, X86::NEG16m }, { X86::NEG32r, X86::NEG32m }, + { X86::NEG64r, X86::NEG64m }, { X86::NEG8r, X86::NEG8m }, { X86::NOT16r, X86::NOT16m }, { X86::NOT32r, X86::NOT32m }, + { X86::NOT64r, X86::NOT64m }, { X86::NOT8r, X86::NOT8m }, { X86::OR16ri, X86::OR16mi }, { X86::OR16ri8, X86::OR16mi8 }, @@ -304,6 +342,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::OR32ri, X86::OR32mi }, { X86::OR32ri8, X86::OR32mi8 }, { X86::OR32rr, X86::OR32mr }, + { X86::OR64ri32, X86::OR64mi32 }, + { X86::OR64ri8, X86::OR64mi8 }, + { X86::OR64rr, X86::OR64mr }, { X86::OR8ri, X86::OR8mi }, { X86::OR8rr, X86::OR8mr }, { X86::ROL16r1, X86::ROL16m1 }, @@ -312,6 +353,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::ROL32r1, X86::ROL32m1 }, { X86::ROL32rCL, X86::ROL32mCL }, { X86::ROL32ri, X86::ROL32mi }, + { X86::ROL64r1, X86::ROL64m1 }, + { X86::ROL64rCL, X86::ROL64mCL }, + { X86::ROL64ri, X86::ROL64mi }, { X86::ROL8r1, X86::ROL8m1 }, { X86::ROL8rCL, X86::ROL8mCL }, { X86::ROL8ri, X86::ROL8mi }, @@ -321,6 +365,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::ROR32r1, X86::ROR32m1 }, { X86::ROR32rCL, X86::ROR32mCL }, { X86::ROR32ri, X86::ROR32mi }, + { X86::ROR64r1, X86::ROR64m1 }, + { X86::ROR64rCL, X86::ROR64mCL }, + { X86::ROR64ri, X86::ROR64mi }, { X86::ROR8r1, X86::ROR8m1 }, { X86::ROR8rCL, X86::ROR8mCL }, { X86::ROR8ri, X86::ROR8mi }, @@ -330,18 +377,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::SAR32r1, X86::SAR32m1 }, { X86::SAR32rCL, X86::SAR32mCL }, { X86::SAR32ri, X86::SAR32mi }, + { X86::SAR64r1, X86::SAR64m1 }, + { X86::SAR64rCL, X86::SAR64mCL }, + { X86::SAR64ri, X86::SAR64mi }, { X86::SAR8r1, X86::SAR8m1 }, { X86::SAR8rCL, X86::SAR8mCL }, { X86::SAR8ri, X86::SAR8mi }, { X86::SBB32ri, X86::SBB32mi }, { X86::SBB32ri8, X86::SBB32mi8 }, { X86::SBB32rr, X86::SBB32mr }, + { X86::SBB64ri32, X86::SBB64mi32 }, + { X86::SBB64ri8, X86::SBB64mi8 }, + { X86::SBB64rr, X86::SBB64mr }, { X86::SHL16r1, X86::SHL16m1 }, { X86::SHL16rCL, X86::SHL16mCL }, { X86::SHL16ri, X86::SHL16mi }, { X86::SHL32r1, X86::SHL32m1 }, { X86::SHL32rCL, X86::SHL32mCL }, { X86::SHL32ri, X86::SHL32mi }, + { X86::SHL64r1, X86::SHL64m1 }, + { X86::SHL64rCL, X86::SHL64mCL }, + { X86::SHL64ri, X86::SHL64mi }, { X86::SHL8r1, X86::SHL8m1 }, { X86::SHL8rCL, X86::SHL8mCL }, { X86::SHL8ri, X86::SHL8mi }, @@ -349,12 +405,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::SHLD16rri8, X86::SHLD16mri8 }, { X86::SHLD32rrCL, X86::SHLD32mrCL }, { X86::SHLD32rri8, X86::SHLD32mri8 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL }, + { X86::SHLD64rri8, X86::SHLD64mri8 }, { X86::SHR16r1, X86::SHR16m1 }, { X86::SHR16rCL, X86::SHR16mCL }, { X86::SHR16ri, X86::SHR16mi }, { X86::SHR32r1, X86::SHR32m1 }, { X86::SHR32rCL, X86::SHR32mCL }, { X86::SHR32ri, X86::SHR32mi }, + { X86::SHR64r1, X86::SHR64m1 }, + { X86::SHR64rCL, X86::SHR64mCL }, + { X86::SHR64ri, X86::SHR64mi }, { X86::SHR8r1, X86::SHR8m1 }, { X86::SHR8rCL, X86::SHR8mCL }, { X86::SHR8ri, X86::SHR8mi }, @@ -362,12 +423,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::SHRD16rri8, X86::SHRD16mri8 }, { X86::SHRD32rrCL, X86::SHRD32mrCL }, { X86::SHRD32rri8, X86::SHRD32mri8 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL }, + { X86::SHRD64rri8, X86::SHRD64mri8 }, { X86::SUB16ri, X86::SUB16mi }, { X86::SUB16ri8, X86::SUB16mi8 }, { X86::SUB16rr, X86::SUB16mr }, { X86::SUB32ri, X86::SUB32mi }, { X86::SUB32ri8, X86::SUB32mi8 }, { X86::SUB32rr, X86::SUB32mr }, + { X86::SUB64ri32, X86::SUB64mi32 }, + { X86::SUB64ri8, X86::SUB64mi8 }, + { X86::SUB64rr, X86::SUB64mr }, { X86::SUB8ri, X86::SUB8mi }, { X86::SUB8rr, X86::SUB8mr }, { X86::XOR16ri, X86::XOR16mi }, @@ -376,6 +442,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::XOR32ri, X86::XOR32mi }, { X86::XOR32ri8, X86::XOR32mi8 }, { X86::XOR32rr, X86::XOR32mr }, + { X86::XOR64ri32, X86::XOR64mi32 }, + { X86::XOR64ri8, X86::XOR64mi8 }, + { X86::XOR64rr, X86::XOR64mr }, { X86::XOR8ri, X86::XOR8mi }, { X86::XOR8rr, X86::XOR8mr } }; @@ -388,6 +457,8 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, return MakeM0Inst(X86::MOV16mi, FrameIndex, MI); else if (MI->getOpcode() == X86::MOV32r0) return MakeM0Inst(X86::MOV32mi, FrameIndex, MI); + else if (MI->getOpcode() == X86::MOV64r0) + return MakeM0Inst(X86::MOV64mi32, FrameIndex, MI); else if (MI->getOpcode() == X86::MOV8r0) return MakeM0Inst(X86::MOV8mi, FrameIndex, MI); @@ -399,19 +470,24 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::CMP8ri, X86::CMP8mi }, { X86::DIV16r, X86::DIV16m }, { X86::DIV32r, X86::DIV32m }, + { X86::DIV64r, X86::DIV64m }, { X86::DIV8r, X86::DIV8m }, { X86::FsMOVAPDrr, X86::MOVSDmr }, { X86::FsMOVAPSrr, X86::MOVSSmr }, { X86::IDIV16r, X86::IDIV16m }, { X86::IDIV32r, X86::IDIV32m }, + { X86::IDIV64r, X86::IDIV64m }, { X86::IDIV8r, X86::IDIV8m }, { X86::IMUL16r, X86::IMUL16m }, { X86::IMUL32r, X86::IMUL32m }, + { X86::IMUL64r, X86::IMUL64m }, { X86::IMUL8r, X86::IMUL8m }, { X86::MOV16ri, X86::MOV16mi }, { X86::MOV16rr, X86::MOV16mr }, { X86::MOV32ri, X86::MOV32mi }, { X86::MOV32rr, X86::MOV32mr }, + { X86::MOV64ri32, X86::MOV64mi32 }, + { X86::MOV64rr, X86::MOV64mr }, { X86::MOV8ri, X86::MOV8mi }, { X86::MOV8rr, X86::MOV8mr }, { X86::MOVAPDrr, X86::MOVAPDmr }, @@ -424,6 +500,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::MOVUPSrr, X86::MOVUPSmr }, { X86::MUL16r, X86::MUL16m }, { X86::MUL32r, X86::MUL32m }, + { X86::MUL64r, X86::MUL64m }, { X86::MUL8r, X86::MUL8m }, { X86::SETAEr, X86::SETAEm }, { X86::SETAr, X86::SETAm }, @@ -441,9 +518,11 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::SETSr, X86::SETSm }, { X86::TEST16ri, X86::TEST16mi }, { X86::TEST32ri, X86::TEST32mi }, + { X86::TEST64ri32, X86::TEST64mi32 }, { X86::TEST8ri, X86::TEST8mi }, { X86::XCHG16rr, X86::XCHG16mr }, { X86::XCHG32rr, X86::XCHG32mr }, + { X86::XCHG64rr, X86::XCHG64mr }, { X86::XCHG8rr, X86::XCHG8mr } }; ASSERT_SORTED(OpcodeTable); @@ -453,16 +532,23 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, static const TableEntry OpcodeTable[] = { { X86::CMP16rr, X86::CMP16rm }, { X86::CMP32rr, X86::CMP32rm }, + { X86::CMP64ri32, X86::CMP64mi32 }, + { X86::CMP64ri8, X86::CMP64mi8 }, + { X86::CMP64rr, X86::CMP64rm }, { X86::CMP8rr, X86::CMP8rm }, { X86::CMPPDrri, X86::CMPPDrmi }, { X86::CMPPSrri, X86::CMPPSrmi }, { X86::CMPSDrr, X86::CMPSDrm }, { X86::CMPSSrr, X86::CMPSSrm }, { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, { X86::FsMOVAPDrr, X86::MOVSDrm }, { X86::FsMOVAPSrr, X86::MOVSSrm }, @@ -470,6 +556,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::IMUL16rri8, X86::IMUL16rmi8 }, { X86::IMUL32rri, X86::IMUL32rmi }, { X86::IMUL32rri8, X86::IMUL32rmi8 }, + { X86::IMUL64rr, X86::IMUL64rm }, + { X86::IMUL64rri32, X86::IMUL64rmi32 }, + { X86::IMUL64rri8, X86::IMUL64rmi8 }, { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, { X86::Int_COMISDrr, X86::Int_COMISDrm }, @@ -480,20 +569,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, { X86::MOV16rr, X86::MOV16rm }, { X86::MOV32rr, X86::MOV32rm }, + { X86::MOV64rr, X86::MOV64rm }, { X86::MOV8rr, X86::MOV8rm }, { X86::MOVAPDrr, X86::MOVAPDrm }, { X86::MOVAPSrr, X86::MOVAPSrm }, @@ -509,22 +605,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::MOVSX16rr8, X86::MOVSX16rm8 }, { X86::MOVSX32rr16, X86::MOVSX32rm16 }, { X86::MOVSX32rr8, X86::MOVSX32rm8 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8 }, { X86::MOVUPDrr, X86::MOVUPDrm }, { X86::MOVUPSrr, X86::MOVUPSrm }, { X86::MOVZX16rr8, X86::MOVZX16rm8 }, { X86::MOVZX32rr16, X86::MOVZX32rm16 }, { X86::MOVZX32rr8, X86::MOVZX32rm8 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8 }, { X86::PSHUFDri, X86::PSHUFDmi }, { X86::PSHUFHWri, X86::PSHUFHWmi }, { X86::PSHUFLWri, X86::PSHUFLWmi }, + { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 }, { X86::TEST16rr, X86::TEST16rm }, { X86::TEST32rr, X86::TEST32rm }, + { X86::TEST64rr, X86::TEST64rm }, { X86::TEST8rr, X86::TEST8rm }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 { X86::UCOMISDrr, X86::UCOMISDrm }, { X86::UCOMISSrr, X86::UCOMISSrm }, { X86::XCHG16rr, X86::XCHG16rm }, { X86::XCHG32rr, X86::XCHG32rm }, + { X86::XCHG64rr, X86::XCHG64rm }, { X86::XCHG8rr, X86::XCHG8rm } }; ASSERT_SORTED(OpcodeTable); @@ -533,8 +637,10 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, } else if (i == 2) { static const TableEntry OpcodeTable[] = { { X86::ADC32rr, X86::ADC32rm }, + { X86::ADC64rr, X86::ADC64rm }, { X86::ADD16rr, X86::ADD16rm }, { X86::ADD32rr, X86::ADD32rm }, + { X86::ADD64rr, X86::ADD64rm }, { X86::ADD8rr, X86::ADD8rm }, { X86::ADDPDrr, X86::ADDPDrm }, { X86::ADDPSrr, X86::ADDPSrm }, @@ -544,6 +650,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, { X86::AND16rr, X86::AND16rm }, { X86::AND32rr, X86::AND32rm }, + { X86::AND64rr, X86::AND64rm }, { X86::AND8rr, X86::AND8rm }, { X86::ANDNPDrr, X86::ANDNPDrm }, { X86::ANDNPSrr, X86::ANDNPSrm }, @@ -551,32 +658,46 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::ANDPSrr, X86::ANDPSrm }, { X86::CMOVA16rr, X86::CMOVA16rm }, { X86::CMOVA32rr, X86::CMOVA32rm }, + { X86::CMOVA64rr, X86::CMOVA64rm }, { X86::CMOVAE16rr, X86::CMOVAE16rm }, { X86::CMOVAE32rr, X86::CMOVAE32rm }, + { X86::CMOVAE64rr, X86::CMOVAE64rm }, { X86::CMOVB16rr, X86::CMOVB16rm }, { X86::CMOVB32rr, X86::CMOVB32rm }, + { X86::CMOVB64rr, X86::CMOVB64rm }, { X86::CMOVBE16rr, X86::CMOVBE16rm }, { X86::CMOVBE32rr, X86::CMOVBE32rm }, + { X86::CMOVBE64rr, X86::CMOVBE64rm }, { X86::CMOVE16rr, X86::CMOVE16rm }, { X86::CMOVE32rr, X86::CMOVE32rm }, + { X86::CMOVE64rr, X86::CMOVE64rm }, { X86::CMOVG16rr, X86::CMOVG16rm }, { X86::CMOVG32rr, X86::CMOVG32rm }, + { X86::CMOVG64rr, X86::CMOVG64rm }, { X86::CMOVGE16rr, X86::CMOVGE16rm }, { X86::CMOVGE32rr, X86::CMOVGE32rm }, + { X86::CMOVGE64rr, X86::CMOVGE64rm }, { X86::CMOVL16rr, X86::CMOVL16rm }, { X86::CMOVL32rr, X86::CMOVL32rm }, + { X86::CMOVL64rr, X86::CMOVL64rm }, { X86::CMOVLE16rr, X86::CMOVLE16rm }, { X86::CMOVLE32rr, X86::CMOVLE32rm }, + { X86::CMOVLE64rr, X86::CMOVLE64rm }, { X86::CMOVNE16rr, X86::CMOVNE16rm }, { X86::CMOVNE32rr, X86::CMOVNE32rm }, + { X86::CMOVNE64rr, X86::CMOVNE64rm }, { X86::CMOVNP16rr, X86::CMOVNP16rm }, { X86::CMOVNP32rr, X86::CMOVNP32rm }, + { X86::CMOVNP64rr, X86::CMOVNP64rm }, { X86::CMOVNS16rr, X86::CMOVNS16rm }, { X86::CMOVNS32rr, X86::CMOVNS32rm }, + { X86::CMOVNS64rr, X86::CMOVNS64rm }, { X86::CMOVP16rr, X86::CMOVP16rm }, { X86::CMOVP32rr, X86::CMOVP32rm }, + { X86::CMOVP64rr, X86::CMOVP64rm }, { X86::CMOVS16rr, X86::CMOVS16rm }, { X86::CMOVS32rr, X86::CMOVS32rm }, + { X86::CMOVS64rr, X86::CMOVS64rm }, { X86::DIVPDrr, X86::DIVPDrm }, { X86::DIVPSrr, X86::DIVPSrm }, { X86::DIVSDrr, X86::DIVSDrm }, @@ -597,6 +718,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::MULSSrr, X86::MULSSrm }, { X86::OR16rr, X86::OR16rm }, { X86::OR32rr, X86::OR32rm }, + { X86::OR64rr, X86::OR64rm }, { X86::OR8rr, X86::OR8rm }, { X86::ORPDrr, X86::ORPDrm }, { X86::ORPSrr, X86::ORPSrm }, @@ -655,6 +777,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::RCPPSr, X86::RCPPSm }, { X86::RSQRTPSr, X86::RSQRTPSm }, { X86::SBB32rr, X86::SBB32rm }, + { X86::SBB64rr, X86::SBB64rm }, { X86::SHUFPDrri, X86::SHUFPDrmi }, { X86::SHUFPSrri, X86::SHUFPSrmi }, { X86::SQRTPDr, X86::SQRTPDm }, @@ -663,6 +786,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::SQRTSSr, X86::SQRTSSm }, { X86::SUB16rr, X86::SUB16rm }, { X86::SUB32rr, X86::SUB32rm }, + { X86::SUB64rr, X86::SUB64rm }, { X86::SUB8rr, X86::SUB8rm }, { X86::SUBPDrr, X86::SUBPDrm }, { X86::SUBPSrr, X86::SUBPSrm }, @@ -675,6 +799,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, { X86::XOR16rr, X86::XOR16rm }, { X86::XOR32rr, X86::XOR32rm }, + { X86::XOR64rr, X86::XOR64rm }, { X86::XOR8rr, X86::XOR8rm }, { X86::XORPDrr, X86::XORPDrm }, { X86::XORPSrr, X86::XORPSrm } @@ -707,19 +832,29 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, const unsigned *X86RegisterInfo::getCalleeSaveRegs() const { - static const unsigned CalleeSaveRegs[] = { + static const unsigned CalleeSaveRegs32Bit[] = { X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 }; - return CalleeSaveRegs; + static const unsigned CalleeSaveRegs64Bit[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + + return Is64Bit ? CalleeSaveRegs64Bit : CalleeSaveRegs32Bit; } const TargetRegisterClass* const* X86RegisterInfo::getCalleeSaveRegClasses() const { - static const TargetRegisterClass * const CalleeSaveRegClasses[] = { + static const TargetRegisterClass * const CalleeSaveRegClasses32Bit[] = { &X86::GR32RegClass, &X86::GR32RegClass, &X86::GR32RegClass, &X86::GR32RegClass, 0 }; - return CalleeSaveRegClasses; + static const TargetRegisterClass * const CalleeSaveRegClasses64Bit[] = { + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, 0 + }; + + return Is64Bit ? CalleeSaveRegClasses64Bit : CalleeSaveRegClasses32Bit; } //===----------------------------------------------------------------------===// @@ -754,15 +889,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *New = 0; if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) { - New=BuildMI(X86::SUB32ri, 2, X86::ESP).addReg(X86::ESP).addImm(Amount); + New=BuildMI(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri, 1, StackPtr) + .addReg(StackPtr).addImm(Amount); } else { assert(Old->getOpcode() == X86::ADJCALLSTACKUP); // factor out the amount the callee already popped. unsigned CalleeAmt = Old->getOperand(1).getImmedValue(); Amount -= CalleeAmt; if (Amount) { - unsigned Opc = Amount < 128 ? X86::ADD32ri8 : X86::ADD32ri; - New = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(Amount); + unsigned Opc = (Amount < 128) ? + (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : + (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + New = BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(Amount); } } @@ -774,9 +912,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. if (unsigned CalleeAmt = I->getOperand(1).getImmedValue()) { - unsigned Opc = CalleeAmt < 128 ? X86::SUB32ri8 : X86::SUB32ri; + unsigned Opc = (CalleeAmt < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); MachineInstr *New = - BuildMI(Opc, 1, X86::ESP).addReg(X86::ESP).addImm(CalleeAmt); + BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(CalleeAmt); MBB.insert(I, New); } } @@ -794,19 +934,18 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{ } int FrameIndex = MI.getOperand(i).getFrameIndex(); - // This must be part of a four operand memory reference. Replace the - // FrameIndex with base register with EBP. Add add an offset to the offset. - MI.getOperand(i).ChangeToRegister(hasFP(MF) ? X86::EBP : X86::ESP, false); + // FrameIndex with base register with EBP. Add an offset to the offset. + MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false); // Now add the frame object offset to the offset from EBP. int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+3).getImmedValue()+4; + MI.getOperand(i+3).getImmedValue()+SlotSize; if (!hasFP(MF)) Offset += MF.getFrameInfo()->getStackSize(); else - Offset += 4; // Skip the saved EBP + Offset += SlotSize; // Skip the saved EBP MI.getOperand(i+3).ChangeToImmediate(Offset); } @@ -815,7 +954,7 @@ void X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ if (hasFP(MF)) { // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, -8); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,SlotSize * -2); assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); } @@ -840,9 +979,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (!hasFP(MF)) NumBytes += MFI->getMaxCallFrameSize(); - // Round the size to a multiple of the alignment (don't forget the 4 byte + // Round the size to a multiple of the alignment (don't forget the 4/8 byte // offset though). - NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4; + NumBytes = ((NumBytes+SlotSize)+Align-1)/Align*Align - SlotSize; } // Update frame info to pretend that this is part of the stack... @@ -859,8 +998,10 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { MI = BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("_alloca"); MBB.insert(MBBI, MI); } else { - unsigned Opc = NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri; - MI = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes); + unsigned Opc = (NumBytes < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + MI= BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(NumBytes); MBB.insert(MBBI, MI); } } @@ -868,18 +1009,21 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (hasFP(MF)) { // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+4; + int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+SlotSize; // Save EBP into the appropriate stack slot... - MI = addRegOffset(BuildMI(X86::MOV32mr, 5), // mov [ESP-<offset>], EBP - X86::ESP, EBPOffset+NumBytes).addReg(X86::EBP); + // mov [ESP-<offset>], EBP + MI = addRegOffset(BuildMI(Is64Bit ? X86::MOV64mr : X86::MOV32mr, 5), + StackPtr, EBPOffset+NumBytes).addReg(FramePtr); MBB.insert(MBBI, MI); // Update EBP with the new base value... - if (NumBytes == 4) // mov EBP, ESP - MI = BuildMI(X86::MOV32rr, 2, X86::EBP).addReg(X86::ESP); + if (NumBytes == SlotSize) // mov EBP, ESP + MI = BuildMI(Is64Bit ? X86::MOV64rr : X86::MOV32rr, 2, FramePtr). + addReg(StackPtr); else // lea EBP, [ESP+StackSize] - MI = addRegOffset(BuildMI(X86::LEA32r, 5, X86::EBP), X86::ESP,NumBytes-4); + MI = addRegOffset(BuildMI(Is64Bit ? X86::LEA64r : X86::LEA32r, + 5, FramePtr), StackPtr, NumBytes-SlotSize); MBB.insert(MBBI, MI); } @@ -916,13 +1060,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, if (hasFP(MF)) { // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4; + int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+SlotSize; // mov ESP, EBP - BuildMI(MBB, MBBI, X86::MOV32rr, 1, X86::ESP).addReg(X86::EBP); + BuildMI(MBB, MBBI, Is64Bit ? X86::MOV64rr : X86::MOV32rr, 1, StackPtr). + addReg(FramePtr); // pop EBP - BuildMI(MBB, MBBI, X86::POP32r, 0, X86::EBP); + BuildMI(MBB, MBBI, Is64Bit ? X86::POP64r : X86::POP32r, 0, FramePtr); } else { // Get the number of bytes allocated from the FrameInfo... unsigned NumBytes = MFI->getStackSize(); @@ -932,14 +1077,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, // instruction, merge the two instructions. if (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); - if ((PI->getOpcode() == X86::ADD32ri || - PI->getOpcode() == X86::ADD32ri8) && - PI->getOperand(0).getReg() == X86::ESP) { + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr) { NumBytes += PI->getOperand(2).getImmedValue(); MBB.erase(PI); - } else if ((PI->getOpcode() == X86::SUB32ri || - PI->getOpcode() == X86::SUB32ri8) && - PI->getOperand(0).getReg() == X86::ESP) { + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { NumBytes -= PI->getOperand(2).getImmedValue(); MBB.erase(PI); } else if (PI->getOpcode() == X86::ADJSTACKPTRri) { @@ -949,11 +1095,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } if (NumBytes > 0) { - unsigned Opc = NumBytes < 128 ? X86::ADD32ri8 : X86::ADD32ri; - BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes); + unsigned Opc = (NumBytes < 128) ? + (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : + (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(NumBytes); } else if ((int)NumBytes < 0) { - unsigned Opc = -NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri; - BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(-NumBytes); + unsigned Opc = (-NumBytes < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(-NumBytes); } } } @@ -964,7 +1114,7 @@ unsigned X86RegisterInfo::getRARegister() const { } unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { - return hasFP(MF) ? X86::EBP : X86::ESP; + return hasFP(MF) ? FramePtr : StackPtr; } namespace llvm { @@ -974,68 +1124,160 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) { case MVT::i8: if (High) { switch (Reg) { - default: return Reg; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AH; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: return X86::DH; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: return X86::CH; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: return X86::BH; } } else { switch (Reg) { - default: return Reg; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AL; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: return X86::DL; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: return X86::CL; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: return X86::BL; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SIL; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DIL; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BPL; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SPL; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8B; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9B; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10B; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11B; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12B; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13B; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14B; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15B; } } case MVT::i16: switch (Reg) { default: return Reg; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AX; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: return X86::DX; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: return X86::CX; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: return X86::BX; - case X86::ESI: + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: return X86::SI; - case X86::EDI: + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: return X86::DI; - case X86::EBP: + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: return X86::BP; - case X86::ESP: + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: return X86::SP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8W; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9W; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10W; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11W; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12W; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13W; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14W; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15W; } case MVT::i32: switch (Reg) { - default: return true; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::EAX; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: return X86::EDX; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: return X86::ECX; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: return X86::EBX; - case X86::SI: + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: return X86::ESI; - case X86::DI: + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: return X86::EDI; - case X86::BP: + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: return X86::EBP; - case X86::SP: + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: return X86::ESP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8D; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9D; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10D; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11D; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12D; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13D; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14D; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15D; + } + case MVT::i64: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::RAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::RDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::RCX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::RBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::RSI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::RDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::RBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::RSP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15; } } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index e86cc28d52..fdab3ee7a0 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -20,10 +20,26 @@ namespace llvm { class Type; class TargetInstrInfo; + class X86TargetMachine; struct X86RegisterInfo : public X86GenRegisterInfo { + X86TargetMachine &TM; const TargetInstrInfo &TII; - X86RegisterInfo(const TargetInstrInfo &tii); +private: + /// Is64Bit - Is the target 64-bits. + bool Is64Bit; + + /// SlotSize - Stack slot size in bytes. + unsigned SlotSize; + + /// StackPtr - X86 physical register used as stack ptr. + unsigned StackPtr; + + /// FramePtr - X86 physical register used as frame ptr. + unsigned FramePtr; + +public: + X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); /// Code Generation virtual methods... void storeRegToStackSlot(MachineBasicBlock &MBB, diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 7a713c3110..4728c0c960 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -23,35 +23,92 @@ let Namespace = "X86" in { // because the register file generator is smart enough to figure out that // AL aliases AX if we tell it that AX aliased AL (for example). + // FIXME: X86-64 have different Dwarf numbers. + // 64-bit registers, X86-64 only + def RAX : Register<"RAX">, DwarfRegNum<0>; + def RDX : Register<"RDX">, DwarfRegNum<1>; + def RCX : Register<"RCX">, DwarfRegNum<2>; + def RBX : Register<"RBX">, DwarfRegNum<3>; + def RSI : Register<"RSI">, DwarfRegNum<4>; + def RDI : Register<"RDI">, DwarfRegNum<5>; + def RBP : Register<"RBP">, DwarfRegNum<6>; + def RSP : Register<"RSP">, DwarfRegNum<7>; + + def R8 : Register<"R8">, DwarfRegNum<8>; + def R9 : Register<"R9">, DwarfRegNum<9>; + def R10 : Register<"R10">, DwarfRegNum<10>; + def R11 : Register<"R11">, DwarfRegNum<11>; + def R12 : Register<"R12">, DwarfRegNum<12>; + def R13 : Register<"R13">, DwarfRegNum<13>; + def R14 : Register<"R14">, DwarfRegNum<14>; + def R15 : Register<"R15">, DwarfRegNum<15>; + // 32-bit registers - def EAX : Register<"EAX">, DwarfRegNum<0>; - def ECX : Register<"ECX">, DwarfRegNum<1>; - def EDX : Register<"EDX">, DwarfRegNum<2>; - def EBX : Register<"EBX">, DwarfRegNum<3>; - def ESP : Register<"ESP">, DwarfRegNum<4>; - def EBP : Register<"EBP">, DwarfRegNum<5>; - def ESI : Register<"ESI">, DwarfRegNum<6>; - def EDI : Register<"EDI">, DwarfRegNum<7>; + def EAX : RegisterGroup<"EAX", [RAX]>, DwarfRegNum<0>; + def ECX : RegisterGroup<"ECX", [RCX]>, DwarfRegNum<1>; + def EDX : RegisterGroup<"EDX", [RDX]>, DwarfRegNum<2>; + def EBX : RegisterGroup<"EBX", [RBX]>, DwarfRegNum<3>; + def ESP : RegisterGroup<"ESP", [RSP]>, DwarfRegNum<4>; + def EBP : RegisterGroup<"EBP", [RBP]>, DwarfRegNum<5>; + def ESI : RegisterGroup<"ESI", [RSI]>, DwarfRegNum<6>; + def EDI : RegisterGroup<"EDI", [RDI]>, DwarfRegNum<7>; + // X86-64 only + def R8D : RegisterGroup<"R8D", [R8]>, DwarfRegNum<8>; + def R9D : RegisterGroup<"R9D", [R9]>, DwarfRegNum<9>; + def R10D : RegisterGroup<"R10D", [R10]>, DwarfRegNum<10>; + def R11D : RegisterGroup<"R11D", [R11]>, DwarfRegNum<11>; + def R12D : RegisterGroup<"R12D", [R12]>, DwarfRegNum<12>; + def R13D : RegisterGroup<"R13D", [R13]>, DwarfRegNum<13>; + def R14D : RegisterGroup<"R14D", [R14]>, DwarfRegNum<14>; + def R15D : RegisterGroup<"R15D", [R15]>, DwarfRegNum<15>; + // 16-bit registers - def AX : RegisterGroup<"AX", [EAX]>, DwarfRegNum<0>; - def CX : RegisterGroup<"CX", [ECX]>, DwarfRegNum<1>; - def DX : RegisterGroup<"DX", [EDX]>, DwarfRegNum<2>; - def BX : RegisterGroup<"BX", [EBX]>, DwarfRegNum<3>; - def SP : RegisterGroup<"SP", [ESP]>, DwarfRegNum<4>; - def BP : RegisterGroup<"BP", [EBP]>, DwarfRegNum<5>; - def SI : RegisterGroup<"SI", [ESI]>, DwarfRegNum<6>; - def DI : RegisterGroup<"DI", [EDI]>, DwarfRegNum<7>; + def AX : RegisterGroup<"AX", [EAX,RAX]>, DwarfRegNum<0>; + def CX : RegisterGroup<"CX", [ECX,RCX]>, DwarfRegNum<1>; + def DX : RegisterGroup<"DX", [EDX,RDX]>, DwarfRegNum<2>; + def BX : RegisterGroup<"BX", [EBX,RBX]>, DwarfRegNum<3>; + def SP : RegisterGroup<"SP", [ESP,RSP]>, DwarfRegNum<4>; + def BP : RegisterGroup<"BP", [EBP,RBP]>, DwarfRegNum<5>; + def SI : RegisterGroup<"SI", [ESI,RSI]>, DwarfRegNum<6>; + def DI : RegisterGroup<"DI", [EDI,RDI]>, DwarfRegNum<7>; + // X86-64 only + def R8W : RegisterGroup<"R8W", [R8D,R8]>, DwarfRegNum<8>; + def R9W : RegisterGroup<"R9W", [R9D,R9]>, DwarfRegNum<9>; + def R10W : RegisterGroup<"R10W", [R10D,R10]>, DwarfRegNum<10>; + def R11W : RegisterGroup<"R11W", [R11D,R11]>, DwarfRegNum<11>; + def R12W : RegisterGroup<"R12W", [R12D,R12]>, DwarfRegNum<12>; + def R13W : RegisterGroup<"R13W", [R13D,R13]>, DwarfRegNum<13>; + def R14W : RegisterGroup<"R14W", [R14D,R14]>, DwarfRegNum<14>; + def R15W : RegisterGroup<"R15W", [R15D,R15]>, DwarfRegNum<15>; + // 8-bit registers - def AL : RegisterGroup<"AL", [AX,EAX]>, DwarfRegNum<0>; - def CL : RegisterGroup<"CL", [CX,ECX]>, DwarfRegNum<1>; - def DL : RegisterGroup<"DL", [DX,EDX]>, DwarfRegNum<2>; - def BL : RegisterGroup<"BL", [BX,EBX]>, DwarfRegNum<3>; - def AH : RegisterGroup<"AH", [AX,EAX]>, DwarfRegNum<0>; - def CH : RegisterGroup<"CH", [CX,ECX]>, DwarfRegNum<1>; - def DH : RegisterGroup<"DH", [DX,EDX]>, DwarfRegNum<2>; - def BH : RegisterGroup<"BH", [BX,EBX]>, DwarfRegNum<3>; + // Low registers + def AL : RegisterGroup<"AL", [AX,EAX,RAX]>, DwarfRegNum<0>; + def CL : RegisterGroup<"CL", [CX,ECX,RCX]>, DwarfRegNum<1>; + def DL : RegisterGroup<"DL", [DX,EDX,RDX]>, DwarfRegNum<2>; + def BL : RegisterGroup<"BL", [BX,EBX,RBX]>, DwarfRegNum<3>; + + // X86-64 only + def SIL : RegisterGroup<"SIL", [SI,ESI,RSI]>, DwarfRegNum<4>; + def DIL : RegisterGroup<"DIL", [DI,EDI,RDI]>, DwarfRegNum<5>; + def BPL : RegisterGroup<"BPL", [BP,EBP,RBP]>, DwarfRegNum<6>; + def SPL : RegisterGroup<"SPL", [SP,ESP,RSP]>, DwarfRegNum<7>; + def R8B : RegisterGroup<"R8B", [R8W,R8D,R8]>, DwarfRegNum<8>; + def R9B : RegisterGroup<"R9B", [R9W,R9D,R9]>, DwarfRegNum<9>; + def R10B : RegisterGroup<"R10B", [R10W,R10D,R10]>, DwarfRegNum<10>; + def R11B : RegisterGroup<"R11B", [R11W,R11D,R11]>, DwarfRegNum<11>; + def R12B : RegisterGroup<"R12B", [R12W,R12D,R12]>, DwarfRegNum<12>; + def R13B : RegisterGroup<"R13B", [R13W,R13D,R13]>, DwarfRegNum<13>; + def R14B : RegisterGroup<"R14B", [R14W,R14D,R14]>, DwarfRegNum<14>; + def R15B : RegisterGroup<"R15B", [R15W,R15D,R15]>, DwarfRegNum<15>; + + // High registers X86-32 only + def AH : RegisterGroup<"AH", [AX,EAX,RAX]>, DwarfRegNum<0>; + def CH : RegisterGroup<"CH", [CX,ECX,RCX]>, DwarfRegNum<1>; + def DH : RegisterGroup<"DH", [DX,EDX,RDX]>, DwarfRegNum<2>; + def BH : RegisterGroup<"BH", [BX,EBX,RBX]>, DwarfRegNum<3>; // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : Register<"MM0">, DwarfRegNum<29>; @@ -73,14 +130,24 @@ let Namespace = "X86" in { def FP6 : Register<"FP6">, DwarfRegNum<-1>; // XMM Registers, used by the various SSE instruction set extensions - def XMM0: Register<"XMM0">, DwarfRegNum<21>; - def XMM1: Register<"XMM1">, DwarfRegNum<22>; - def XMM2: Register<"XMM2">, DwarfRegNum<23>; - def XMM3: Register<"XMM3">, DwarfRegNum<24>; - def XMM4: Register<"XMM4">, DwarfRegNum<25>; - def XMM5: Register<"XMM5">, DwarfRegNum<26>; - def XMM6: Register<"XMM6">, DwarfRegNum<27>; - def XMM7: Register<"XMM7">, DwarfRegNum<28>; + def XMM0: Register<"XMM0">, DwarfRegNum<17>; + def XMM1: Register<"XMM1">, DwarfRegNum<18>; + def XMM2: Register<"XMM2">, DwarfRegNum<19>; + def XMM3: Register<"XMM3">, DwarfRegNum<20>; + def XMM4: Register<"XMM4">, DwarfRegNum<21>; + def XMM5: Register<"XMM5">, DwarfRegNum<22>; + def XMM6: Register<"XMM6">, DwarfRegNum<23>; + def XMM7: Register<"XMM7">, DwarfRegNum<24>; + + // X86-64 only + def XMM8: Register<"XMM8">, DwarfRegNum<25>; + def XMM9: Register<"XMM9">, DwarfRegNum<26>; + def XMM10: Register<"XMM10">, DwarfRegNum<27>; + def XMM11: Register<"XMM11">, DwarfRegNum<28>; + def XMM12: Register<"XMM12">, DwarfRegNum<29>; + def XMM13: Register<"XMM13">, DwarfRegNum<30>; + def XMM14: Register<"XMM14">, DwarfRegNum<31>; + def XMM15: Register<"XMM15">, DwarfRegNum<32>; // Floating point stack registers def ST0 : Register<"ST(0)">, DwarfRegNum<11>; @@ -99,52 +166,247 @@ let Namespace = "X86" in { // implicitly defined to be the register allocation order. // -// List AL,CL,DL before AH,CH,DH, as X86 processors often suffer from false -// dependences between upper and lower parts of the register. BL and BH are -// last because they are call clobbered. Both Athlon and P4 chips suffer this -// issue. -def GR8 : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]>; +// List call-clobbered registers before callee-save registers. RBX, RBP, (and +// R12, R13, R14, and R15 for X86-64) are callee-save registers. +// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and +// R8B, ... R15B. +// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions, +def GR8 : RegisterClass<"X86", [i8], 8, + [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate SPL or BPL. + static const unsigned X86_GR8_AO_64_fp[] = + {X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, + X86::R8B, X86::R9B, X86::R10B, X86::R11B, + X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B}; + // If not, just don't allocate SPL. + static const unsigned X86_GR8_AO_64[] = + {X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, + X86::R8B, X86::R9B, X86::R10B, X86::R11B, + X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL}; + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + static const unsigned X86_GR8_AO_32[] = + {X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH}; + + GR8Class::iterator + GR8Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return X86_GR8_AO_32; + else if (hasFP(MF)) + return X86_GR8_AO_64_fp; + else + return X86_GR8_AO_64; + } -def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { + GR8Class::iterator + GR8Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned)); + else if (hasFP(MF)) + return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned)); + else + return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned)); + } + }]; +} + + +def GR16 : RegisterClass<"X86", [i16], 16, + [AX, CX, DX, SI, DI, BX, BP, SP, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]> { let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate SP or BP. + static const unsigned X86_GR16_AO_64_fp[] = + {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, + X86::R8W, X86::R9W, X86::R10W, X86::R11W, + X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W}; + static const unsigned X86_GR16_AO_32_fp[] = + {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX}; + // If not, just don't allocate SPL. + static const unsigned X86_GR16_AO_64[] = + {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, + X86::R8W, X86::R9W, X86::R10W, X86::R11W, + X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP}; + static const unsigned X86_GR16_AO_32[] = + {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP}; + + GR16Class::iterator + GR16Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (Subtarget.is64Bit()) { + if (hasFP(MF)) + return X86_GR16_AO_64_fp; + else + return X86_GR16_AO_64; + } else { + if (hasFP(MF)) + return X86_GR16_AO_32_fp; + else + return X86_GR16_AO_32; + } + } + GR16Class::iterator GR16Class::allocation_order_end(const MachineFunction &MF) const { - if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr? - return end()-2; // If so, don't allocate SP or BP - else - return end()-1; // If not, just don't allocate SP + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (Subtarget.is64Bit()) { + if (hasFP(MF)) + return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned)); + else + return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned)); + } else { + if (hasFP(MF)) + return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned)); + else + return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned)); + } } }]; } + def GR32 : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { + [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]> { let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ + // Does the function dedicate RBP / EBP to being a frame ptr? + // If so, don't allocate ESP or EBP. + static const unsigned X86_GR32_AO_64_fp[] = + {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, + X86::R8D, X86::R9D, X86::R10D, X86::R11D, + X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D}; + static const unsigned X86_GR32_AO_32_fp[] = + {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX}; + // If not, just don't allocate SPL. + static const unsigned X86_GR32_AO_64[] = + {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, + X86::R8D, X86::R9D, X86::R10D, X86::R11D, + X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP}; + static const unsigned X86_GR32_AO_32[] = + {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP}; + + GR32Class::iterator + GR32Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (Subtarget.is64Bit()) { + if (hasFP(MF)) + return X86_GR32_AO_64_fp; + else + return X86_GR32_AO_64; + } else { + if (hasFP(MF)) + return X86_GR32_AO_32_fp; + else + return X86_GR32_AO_32; + } + } + GR32Class::iterator GR32Class::allocation_order_end(const MachineFunction &MF) const { - if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr? - return end()-2; // If so, don't allocate ESP or EBP + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (Subtarget.is64Bit()) { + if (hasFP(MF)) + return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned)); + else + return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned)); + } else { + if (hasFP(MF)) + return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned)); + else + return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned)); + } + } + }]; +} + + +def GR64 : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + RBX, R14, R15, R12, R13, RBP, RSP]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64Class::iterator + GR64Class::allocation_order_end(const MachineFunction &MF) const { + if (hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + return end()-2; // If so, don't allocate RSP or RBP else - return end()-1; // If not, just don't allocate ESP + return end()-1; // If not, just don't allocate RSP } }]; } + // GR16, GR32 subclasses which contain registers that have R8 sub-registers. +// These should only be used for 32-bit mode. def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]>; def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]>; // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>; + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, + XMM12, XMM13, XMM14, XMM15]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FR32Class::iterator + FR32Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode. + else + return end(); + } + }]; +} + def FR64 : RegisterClass<"X86", [f64], 64, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>; + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, + XMM12, XMM13, XMM14, XMM15]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FR64Class::iterator + FR64Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode. + else + return end(); + } + }]; +} + // FIXME: This sets up the floating point register files as though they are f64 // values, though they really are f80 values. This will cause us to spill @@ -174,4 +436,21 @@ def RST : RegisterClass<"X86", [f64], 32, def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>; + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, + XMM12, XMM13, XMM14, XMM15]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VR128Class::iterator + VR128Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode. + else + return end(); + } + }]; +} diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e3776d87a5..556f40bba2 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -12,9 +12,10 @@ //===----------------------------------------------------------------------===// #include "X86Subtarget.h" +#include "X86GenSubtarget.inc" #include "llvm/Module.h" #include "llvm/Support/CommandLine.h" -#include "X86GenSubtarget.inc" +#include <iostream> using namespace llvm; cl::opt<X86Subtarget::AsmWriterFlavorTy> @@ -29,7 +30,18 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::unset), /// specified arguments. If we can't run cpuid on the host, return true. static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) +#if defined(__x86_64__) + asm ("pushq\t%%rbx\n\t" + "cpuid\n\t" + "movl\t%%ebx, %%esi\n\t" + "popq\t%%rbx" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) #if defined(__GNUC__) asm ("pushl\t%%ebx\n\t" "cpuid\n\t" @@ -99,8 +111,8 @@ static const char *GetCurrentX86CPU() { case 9: case 13: return "pentium-m"; case 14: return "yonah"; - default: - return (Model > 14) ? "yonah" : "i686"; + case 15: return "core2"; + default: return "i686"; } case 15: { switch (Model) { @@ -154,14 +166,16 @@ static const char *GetCurrentX86CPU() { } } -X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) { - stackAlignment = 8; - // FIXME: this is a known good value for Yonah. Not sure about others. - MinRepStrSizeThreshold = 128; - X86SSELevel = NoMMXSSE; - X863DNowLevel = NoThreeDNow; - AsmFlavor = AsmWriterFlavor; - Is64Bit = false; +X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) + : AsmFlavor(AsmWriterFlavor) + , X86SSELevel(NoMMXSSE) + , X863DNowLevel(NoThreeDNow) + , HasX86_64(false) + , stackAlignment(8) + // FIXME: this is a known good value for Yonah. How about others? + , MinRepStrSizeThreshold(128) + , Is64Bit(is64Bit) + , TargetType(isELF) { // Default to ELF unless otherwise specified. // Determine default and user specified characteristics std::string CPU = GetCurrentX86CPU(); @@ -169,9 +183,12 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) { // Parse features string. ParseSubtargetFeatures(FS, CPU); - // Default to ELF unless otherwise specified. - TargetType = isELF; - + if (Is64Bit && !HasX86_64) { + std::cerr << "Warning: Generation of 64-bit code for a 32-bit processor " + "requested.\n"; + HasX86_64 = true; + } + // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. const std::string& TT = M.getTargetTriple(); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index b373be61af..224ebcd308 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -44,9 +44,9 @@ protected: /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported. X863DNowEnum X863DNowLevel; - - /// Is64Bit - True if the processor supports Em64T. - bool Is64Bit; + + /// HasX86_64 - True if the processor supports X86-64 instructions. + bool HasX86_64; /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. @@ -55,6 +55,11 @@ protected: /// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops. unsigned MinRepStrSizeThreshold; +private: + /// Is64Bit - True if the processor supports 64-bit instructions and module + /// pointer size is 64 bit. + bool Is64Bit; + public: enum { isELF, isCygwin, isDarwin, isWindows @@ -63,7 +68,7 @@ public: /// This constructor initializes the data members to match that /// of the specified module. /// - X86Subtarget(const Module &M, const std::string &FS); + X86Subtarget(const Module &M, const std::string &FS, bool is64Bit); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp index b1da4ce9d5..9ca0e15bc9 100644 --- a/lib/Target/X86/X86TargetAsmInfo.cpp +++ b/lib/Target/X86/X86TargetAsmInfo.cpp @@ -26,13 +26,16 @@ X86TargetAsmInfo::X86TargetAsmInfo(const X86TargetMachine &TM) { case X86Subtarget::isDarwin: AlignmentIsInBytes = false; GlobalPrefix = "_"; - Data64bitsDirective = 0; // we can't emit a 64-bit unit + if (!Subtarget->is64Bit()) + Data64bitsDirective = 0; // we can't emit a 64-bit unit ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. PrivateGlobalPrefix = "L"; // Marker for constant pool idxs ConstantPoolSection = "\t.const\n"; JumpTableDataSection = "\t.const\n"; // FIXME: depends on PIC mode FourByteConstantSection = "\t.literal4\n"; EightByteConstantSection = "\t.literal8\n"; + if (Subtarget->is64Bit()) + SixteenByteConstantSection = "\t.literal16\n"; LCOMMDirective = "\t.lcomm\t"; COMMDirectiveTakesAlignment = false; HasDotTypeDotSizeDirective = false; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 807b462b90..b4fc79abca 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -33,22 +33,31 @@ int X86TargetMachineModule = 0; namespace { // Register the target. - RegisterTarget<X86TargetMachine> X("x86", " IA-32 (Pentium and above)"); + RegisterTarget<X86_32TargetMachine> + X("x86", " 32-bit X86: Pentium-Pro and above"); + RegisterTarget<X86_64TargetMachine> + Y("x86-64", " 64-bit X86: EM64T and AMD64"); } const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const { return new X86TargetAsmInfo(*this); } -unsigned X86TargetMachine::getJITMatchQuality() { +unsigned X86_32TargetMachine::getJITMatchQuality() { #if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) return 10; -#else +#endif return 0; +} + +unsigned X86_64TargetMachine::getJITMatchQuality() { +#if defined(__x86_64__) + return 10; #endif + return 0; } -unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) { +unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) { // We strongly match "i[3-9]86-*". std::string TT = M.getTargetTriple(); if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && @@ -65,18 +74,55 @@ unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) { return getJITMatchQuality()/2; } +unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) { + // We strongly match "x86_64-*". + std::string TT = M.getTargetTriple(); + if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' && + TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-') + return 20; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer64) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS) + : X86TargetMachine(M, FS, false) { +} + + +X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS) + : X86TargetMachine(M, FS, true) { +} + /// X86TargetMachine ctor - Create an ILP32 architecture model /// -X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS) - : Subtarget(M, FS), DataLayout("e-p:32:32-d:32-l:32"), +X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit) + : Subtarget(M, FS, is64Bit), + DataLayout(Subtarget.is64Bit() ? + std::string("e-p:64:64-d:32-l:32") : + std::string("e-p:32:32-d:32-l:32")), FrameInfo(TargetFrameInfo::StackGrowsDown, - Subtarget.getStackAlignment(), -4), + Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), InstrInfo(*this), JITInfo(*this), TLInfo(*this) { if (getRelocationModel() == Reloc::Default) if (Subtarget.isTargetDarwin()) setRelocationModel(Reloc::DynamicNoPIC); else setRelocationModel(Reloc::PIC_); + if (Subtarget.is64Bit()) { + // No DynamicNoPIC support under X86-64. + if (getRelocationModel() == Reloc::DynamicNoPIC) + setRelocationModel(Reloc::PIC_); + // Default X86-64 code model is small. + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 8278cf1b22..05cb9484d6 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -37,7 +37,7 @@ protected: virtual const TargetAsmInfo *createTargetAsmInfo() const; public: - X86TargetMachine(const Module &M, const std::string &FS); + X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -54,6 +54,7 @@ public: static unsigned getModuleMatchQuality(const Module &M); static unsigned getJITMatchQuality(); + // Set up the pass pipeline. virtual bool addInstSelector(FunctionPassManager &PM, bool Fast); virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast); @@ -64,6 +65,27 @@ public: virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast, MachineCodeEmitter &MCE); }; + +/// X86_32TargetMachine - X86 32-bit target machine. +/// +class X86_32TargetMachine : public X86TargetMachine { +public: + X86_32TargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +/// X86_64TargetMachine - X86 64-bit target machine. +/// +class X86_64TargetMachine : public X86TargetMachine { +public: + X86_64TargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + } // End llvm namespace #endif |