summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2011-01-26 02:04:09 +0000
committerNAKAMURA Takumi <geek4civic@gmail.com>2011-01-26 02:04:09 +0000
commit7754f85885f8a961cb403ef13ab39583492d2b1e (patch)
tree37d47e7d7daa736acf8de3396db92f5919811557
parent36c3bc431b92f1573b3f3bd75b644774681998ee (diff)
downloadllvm-7754f85885f8a961cb403ef13ab39583492d2b1e.tar.gz
llvm-7754f85885f8a961cb403ef13ab39583492d2b1e.tar.bz2
llvm-7754f85885f8a961cb403ef13ab39583492d2b1e.tar.xz
Target/X86: Tweak win64's tailcall.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124272 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp27
-rw-r--r--lib/Target/X86/X86InstrCompiler.td4
-rw-r--r--lib/Target/X86/X86InstrControl.td13
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1
-rw-r--r--lib/Target/X86/X86InstrInfo.td7
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp6
-rw-r--r--lib/Target/X86/X86RegisterInfo.td3
-rw-r--r--test/CodeGen/X86/tailcall-ri64.ll24
-rw-r--r--test/CodeGen/X86/tailcallstack64.ll16
9 files changed, 83 insertions, 18 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index edd535e43c..3fcb5e7a8d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2476,9 +2476,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
- if (Subtarget->isTargetWin64())
- // Win64 ABI has additional complications.
- return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
@@ -10078,6 +10075,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ assert(!"TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+ // On AMD64, additional defs should be added before register allocation.
+ if (!Subtarget->isTargetWin64()) {
+ MI->addRegisterDefined(X86::RSI);
+ MI->addRegisterDefined(X86::RDI);
+ MI->addRegisterDefined(X86::XMM6);
+ MI->addRegisterDefined(X86::XMM7);
+ MI->addRegisterDefined(X86::XMM8);
+ MI->addRegisterDefined(X86::XMM9);
+ MI->addRegisterDefined(X86::XMM10);
+ MI->addRegisterDefined(X86::XMM11);
+ MI->addRegisterDefined(X86::XMM12);
+ MI->addRegisterDefined(X86::XMM13);
+ MI->addRegisterDefined(X86::XMM14);
+ MI->addRegisterDefined(X86::XMM15);
+ }
+ return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 70478b8e4a..f72eaa7552 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -866,8 +866,8 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
Requires<[In32BitMode]>;
-def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
- (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (load addr:$dst), imm:$off),
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 4d1c5f7409..77f47250e9 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -266,17 +266,18 @@ let isCall = 1, isCodeGenOnly = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP],
+ usesCustomInserter = 1 in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
[]>;
def TCRETURNri64 : PseudoI<(outs),
- (ins GR64_TC:$dst, i32imm:$offset, variable_ops), []>;
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
@@ -284,7 +285,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, variable_ops),
"jmp\t$dst # TAILCALL", []>;
- def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
let mayLoad = 1 in
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 63dcd143b5..ceb1b65398 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2025,6 +2025,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case X86::GR64_NOREX_NOSPRegClassID:
case X86::GR64_NOSPRegClassID:
case X86::GR64_TCRegClassID:
+ case X86::GR64_TCW64RegClassID:
return load ? X86::MOV64rm : X86::MOV64mr;
case X86::GR32RegClassID:
case X86::GR32_ABCDRegClassID:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4748f130ed..14c90662d0 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -273,6 +273,10 @@ def i8mem_NOREX : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@@ -287,7 +291,8 @@ def i32mem_TC : Operand<i32> {
// after callee-saved register are popped.
def i64mem_TC : Operand<i64> {
let PrintMethod = "printi64mem";
- let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f41be0cfe5..2f6bd88c65 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -320,6 +320,12 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_TCRegClass;
+ return &X86::GR32_TCRegClass;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 45bb9898b8..612fac2f3b 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -496,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
(GR32_TC sub_32bit)];
}
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
+ R8, R9, R11]>;
+
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, AH, CH, DH, BL, BH]> {
diff --git a/test/CodeGen/X86/tailcall-ri64.ll b/test/CodeGen/X86/tailcall-ri64.ll
new file mode 100644
index 0000000000..914d8f7b8b
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-ri64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8743
+; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
+
+; AMD64: jmpq
+; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
+
+; WIN64: jmpq
+; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
+
+%class = type { [8 x i8] }
+%vt = type { i32 (...)** }
+
+define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
+%this, %vt* %Ty) align 2 {
+entry:
+ %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
+ %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+ %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
+ %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+ %call = tail call %vt* %1(%vt* %Ty, %class* %this)
+ ret %vt* %call
+}
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 52b074def5..0c732d56b6 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,16 +1,20 @@
-; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq ${{24|88}}, %rsp
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl 32(%rsp), %eax
+; CHECK: movl [[A1:32|144]](%rsp), %eax
; Move param %in1 to temp register (%r10d).
-; CHECK: movl 40(%rsp), %r10d
+; CHECK: movl [[A2:40|152]](%rsp), %r10d
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: addl %edi, %eax
+; CHECK: addl {{%edi|%ecx}}, %eax
; Move param %in2 to stack.
-; CHECK: movl %r10d, 32(%rsp)
+; CHECK: movl %r10d, [[A1]](%rsp)
; Move result of addition to stack.
-; CHECK: movl %eax, 40(%rsp)
+; CHECK: movl %eax, [[A2]](%rsp)
; Eventually, do a TAILCALL
; CHECK: TAILCALL