summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp22
-rw-r--r--test/CodeGen/X86/tail-call-got.ll6
-rw-r--r--test/CodeGen/X86/tailcallpic2.ll4
3 files changed, 20 insertions, 12 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0deb18103b..9df0232a34 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2665,15 +2665,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
} else {
- // If we are tail calling a global or external symbol in GOT pic mode, we
- // cannot use a direct jump, since that would make lazy dynamic linking
- // impossible (see PR15086). So pretend this is not a tail call, to
- // prevent the optimization to a jump.
+ // If we are tail calling and generating PIC/GOT style code load the
+ // address of the callee into ECX. The value in ecx is used as target of
+ // the tail jump. This is done to circumvent the ebx/callee-saved problem
+ // for tail calls on PIC/GOT architectures. Normally we would just put the
+ // address of GOT into ebx and then call target@PLT. But for tail calls
+ // ebx would be restored (since ebx is callee saved) before jumping to the
+ // target@PLT.
+
+ // Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if ((G && !G->getGlobal()->hasHiddenVisibility() &&
- !G->getGlobal()->hasProtectedVisibility()) ||
- isa<ExternalSymbolSDNode>(Callee))
- isTailCall = false;
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee, DAG);
}
}
diff --git a/test/CodeGen/X86/tail-call-got.ll b/test/CodeGen/X86/tail-call-got.ll
index bdfdeb5987..84d561dcd8 100644
--- a/test/CodeGen/X86/tail-call-got.ll
+++ b/test/CodeGen/X86/tail-call-got.ll
@@ -5,7 +5,8 @@ target triple = "i386-unknown-freebsd9.0"
define double @test1(double %x) nounwind readnone {
; CHECK-LABEL: test1:
-; CHECK: calll foo@PLT
+; CHECK: movl foo@GOT
+; CHECK-NEXT: jmpl
%1 = tail call double @foo(double %x) nounwind readnone
ret double %1
}
@@ -14,7 +15,8 @@ declare double @foo(double) readnone
define double @test2(double %x) nounwind readnone {
; CHECK-LABEL: test2:
-; CHECK: calll sin@PLT
+; CHECK: movl sin@GOT
+; CHECK-NEXT: jmpl
%1 = tail call double @sin(double %x) nounwind readnone
ret double %1
}
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index c35cee3a98..1b6bdb7698 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -9,7 +9,7 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
entry:
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
ret i32 %tmp11
-; Note that this call via PLT could be further optimized into a direct call (no GOT, no PLT):
-; CHECK: calll tailcallee@PLT
+; CHECK: movl tailcallee@GOT
+; CHECK: jmpl
}