summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-02-06 03:28:46 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-02-06 03:28:46 +0000
commitf22f9b35d6e6444028ed288bfae526f53740b5c1 (patch)
treec276c6e63fd5d9948bf7bef438ce697af69738bd
parenta65aa0f0bba1ef2322d63d05c074a92168684c63 (diff)
downloadllvm-f22f9b35d6e6444028ed288bfae526f53740b5c1.tar.gz
llvm-f22f9b35d6e6444028ed288bfae526f53740b5c1.tar.bz2
llvm-f22f9b35d6e6444028ed288bfae526f53740b5c1.tar.xz
Do not emit callseq instructions around sibcalls. This eliminated some unnecessary stack adjustments.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95475 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp45
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h5
-rw-r--r--test/CodeGen/X86/call-push.ll7
-rw-r--r--test/CodeGen/X86/tailcall2.ll8
4 files changed, 41 insertions, 24 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 05419fe3cc..b268e4b9ae 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1749,8 +1749,6 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue &OutRetAddr, SDValue Chain,
bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl) {
- if (!IsTailCall || FPDiff==0) return Chain;
-
// Adjust the Return address stack slot.
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
@@ -1796,8 +1794,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Outs, Ins, DAG);
+
+ // Sibcalls are automatically detected tailcalls which do not require
+ // ABI changes.
if (!PerformTailCallOpt && isTailCall)
IsSibcall = true;
+
+ if (isTailCall)
+ ++NumTailCalls;
}
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
@@ -1811,17 +1815,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- if (FuncIsMadeTailCallSafe(CallConv))
- NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
- else if (IsSibcall)
+ if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
+ else if (PerformTailCallOpt && CallConv == CallingConv::Fast)
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
- if (isTailCall) {
- ++NumTailCalls;
-
+ if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
@@ -1833,12 +1835,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ if (!IsSibcall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
// Load return adress for tail calls.
- Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
- FPDiff, dl);
+ if (isTailCall && FPDiff)
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+ Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -1888,7 +1892,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else if ((!isTailCall || isByVal) && !IsSibcall) {
+ } else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
@@ -1913,7 +1917,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
-
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
@@ -2110,7 +2113,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDValue, 8> Ops;
- if (isTailCall) {
+ if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
@@ -2179,12 +2182,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
NumBytesForCalleeToPush = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(NumBytesForCalleeToPush,
- true),
- InFlag);
- InFlag = Chain.getValue(1);
+ if (!IsSibcall) {
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
// Handle result values, copying them out of physregs into vregs that we
// return.
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index fafcf7e3d7..bb53bf14ce 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -48,9 +48,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// ReturnAddrIndex - FrameIndex for return slot.
int ReturnAddrIndex;
- /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
- /// Used for creating an area before the register spill area on the stack
- /// the returnaddr can be savely move to this area
+ /// TailCallReturnAddrDelta - The number of bytes by which return address
+ /// stack slot is moved as the result of tail call optimization.
int TailCallReturnAddrDelta;
/// SRetReturnReg - Some subtargets require that sret lowering includes
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index 81516c1c59..02cbccc1a4 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -1,9 +1,14 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | grep subl | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
%struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
%struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
+; CHECK: decode_byte:
+; CHECK: pushl
+; CHECK: popl
+; CHECK: popl
+; CHECK: jmp
entry:
%tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4 ; <i16*> [#uses=1]
%tmp23 = bitcast i16* %tmp2 to i32* ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll
index b2fe9d3bb9..02bf0c00b4 100644
--- a/test/CodeGen/X86/tailcall2.ll
+++ b/test/CodeGen/X86/tailcall2.ll
@@ -146,9 +146,13 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
; eliminated currently.
; 32: t11:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32-NOT: addl ${{[0-9]+}}, %esp
; 32: jmp {{_?}}foo5
; 64: t11:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
; 64: jmp {{_?}}foo5
entry:
%0 = icmp eq i32 %x, 0
@@ -168,9 +172,13 @@ declare i32 @foo5(i32, i32, i32, i32, i32)
define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
; 32: t12:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32-NOT: addl ${{[0-9]+}}, %esp
; 32: jmp {{_?}}foo6
; 64: t12:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
; 64: jmp {{_?}}foo6
entry:
%0 = icmp eq i32 %x, 0