summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDuncan Sands <baldrick@free.fr>2011-09-06 13:37:06 +0000
committerDuncan Sands <baldrick@free.fr>2011-09-06 13:37:06 +0000
commit4a544a79bd735967f1d33fe675ae4566dbd17813 (patch)
tree6e349c2fb6f02699e32acd33d232e8f448a6413c
parentecd37bcd5ce27cacfa11c4731c46d9869278745e (diff)
downloadllvm-4a544a79bd735967f1d33fe675ae4566dbd17813.tar.gz
llvm-4a544a79bd735967f1d33fe675ae4566dbd17813.tar.bz2
llvm-4a544a79bd735967f1d33fe675ae4566dbd17813.tar.xz
Split the init.trampoline intrinsic, which currently combines GCC's
init.trampoline and adjust.trampoline intrinsics, into two intrinsics like in GCC. While having one combined intrinsic is tempting, it is not natural because typically the trampoline initialization needs to be done in one function, and the result of adjust trampoline is needed in a different (nested) function. To get around this llvm-gcc hacks the nested function lowering code to insert an additional parent variable holding the adjust.trampoline result that can be accessed from the child function. Dragonegg doesn't have the luxury of tweaking GCC code, so it stored the result of adjust.trampoline in the memory GCC set aside for the trampoline itself (this is always available in the child function), and set up some new memory (using an alloca) to hold the trampoline. Unfortunately this breaks Go which allocates trampoline memory on the heap and wants to use it even after the parent has exited (!). Rather than doing even more hacks to get Go working, it seemed best to just use two intrinsics like in GCC. Patch mostly by Sanjoy Das. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139140 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/LangRef.html71
-rw-r--r--include/llvm/CodeGen/ISDOpcodes.h21
-rw-r--r--include/llvm/Intrinsics.td10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp13
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp22
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp23
-rw-r--r--lib/Target/X86/X86ISelLowering.h3
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp17
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h3
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp97
-rw-r--r--lib/VMCore/AutoUpgrade.cpp46
-rw-r--r--test/Assembler/AutoUpgradeIntrinsics.ll13
-rw-r--r--test/CodeGen/PowerPC/trampoline.ll6
-rw-r--r--test/CodeGen/XCore/trampoline.ll6
-rw-r--r--test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll6
19 files changed, 289 insertions, 81 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 2b9ee24bf4..0ec08eb0a0 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -275,9 +275,10 @@
</li>
<li><a href="#int_debugger">Debugger intrinsics</a></li>
<li><a href="#int_eh">Exception Handling intrinsics</a></li>
- <li><a href="#int_trampoline">Trampoline Intrinsic</a>
+ <li><a href="#int_trampoline">Trampoline Intrinsics</a>
<ol>
<li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
+ <li><a href="#int_at">'<tt>llvm.adjust.trampoline</tt>' Intrinsic</a></li>
</ol>
</li>
<li><a href="#int_atomics">Atomic intrinsics</a>
@@ -7680,12 +7681,12 @@ LLVM</a>.</p>
<!-- ======================================================================= -->
<h3>
- <a name="int_trampoline">Trampoline Intrinsic</a>
+ <a name="int_trampoline">Trampoline Intrinsics</a>
</h3>
<div>
-<p>This intrinsic makes it possible to excise one parameter, marked with
+<p>These intrinsics make it possible to excise one parameter, marked with
the <a href="#nest"><tt>nest</tt></a> attribute, from a function.
The result is a callable
function pointer lacking the nest parameter - the caller does not need to
@@ -7702,7 +7703,8 @@ LLVM</a>.</p>
<pre class="doc_code">
%tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
%tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
- %p = call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
+ call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
+ %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
%fp = bitcast i8* %p to i32 (i32, i32)*
</pre>
@@ -7720,12 +7722,12 @@ LLVM</a>.</p>
<h5>Syntax:</h5>
<pre>
- declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
+ declare void @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
</pre>
<h5>Overview:</h5>
-<p>This fills the memory pointed to by <tt>tramp</tt> with code and returns a
- function pointer suitable for executing it.</p>
+<p>This fills the memory pointed to by <tt>tramp</tt> with executable code,
+ turning it into a trampoline.</p>
<h5>Arguments:</h5>
<p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
@@ -7739,17 +7741,50 @@ LLVM</a>.</p>
<h5>Semantics:</h5>
<p>The block of memory pointed to by <tt>tramp</tt> is filled with target
- dependent code, turning it into a function. A pointer to this function is
- returned, but needs to be bitcast to an <a href="#int_trampoline">appropriate
- function pointer type</a> before being called. The new function's signature
- is the same as that of <tt>func</tt> with any arguments marked with
- the <tt>nest</tt> attribute removed. At most one such <tt>nest</tt> argument
- is allowed, and it must be of pointer type. Calling the new function is
- equivalent to calling <tt>func</tt> with the same argument list, but
- with <tt>nval</tt> used for the missing <tt>nest</tt> argument. If, after
- calling <tt>llvm.init.trampoline</tt>, the memory pointed to
- by <tt>tramp</tt> is modified, then the effect of any later call to the
- returned function pointer is undefined.</p>
+ dependent code, turning it into a function. Then <tt>tramp</tt> needs to be
+ passed to <a href="#int_at">llvm.adjust.trampoline</a> to get a pointer
+ which can be <a href="#int_trampoline">bitcast (to a new function) and
+ called</a>. The new function's signature is the same as that of
+ <tt>func</tt> with any arguments marked with the <tt>nest</tt> attribute
+ removed. At most one such <tt>nest</tt> argument is allowed, and it must be of
+ pointer type. Calling the new function is equivalent to calling <tt>func</tt>
+ with the same argument list, but with <tt>nval</tt> used for the missing
+ <tt>nest</tt> argument. If, after calling <tt>llvm.init.trampoline</tt>, the
+ memory pointed to by <tt>tramp</tt> is modified, then the effect of any later call
+ to the returned function pointer is undefined.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_at">
+ '<tt>llvm.adjust.trampoline</tt>' Intrinsic
+ </a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+ declare i8* @llvm.adjust.trampoline(i8* &lt;tramp&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>This performs any required machine-specific adjustment to the address of a
+ trampoline (passed as <tt>tramp</tt>).</p>
+
+<h5>Arguments:</h5>
+<p><tt>tramp</tt> must point to a block of memory which already has trampoline code
+ filled in by a previous call to <a href="#int_it"><tt>llvm.init.trampoline</tt>
+ </a>.</p>
+
+<h5>Semantics:</h5>
+<p>On some architectures the address of the code to be executed needs to be
+ different to the address where the trampoline is actually stored. This
+ intrinsic returns the executable address corresponding to <tt>tramp</tt>
+ after performing the required machine specific adjustments.
+ The pointer returned can then be <a href="#int_trampoline"> bitcast and
+ executed</a>.
+</p>
</div>
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 7f5625cd5e..14b1378e72 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -566,14 +566,19 @@ namespace ISD {
// HANDLENODE node - Used as a handle for various purposes.
HANDLENODE,
- // TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
- // It takes as input a token chain, the pointer to the trampoline,
- // the pointer to the nested function, the pointer to pass for the
- // 'nest' parameter, a SRCVALUE for the trampoline and another for
- // the nested function (allowing targets to access the original
- // Function*). It produces the result of the intrinsic and a token
- // chain as output.
- TRAMPOLINE,
+ // INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic. It
+ // takes as input a token chain, the pointer to the trampoline, the pointer
+ // to the nested function, the pointer to pass for the 'nest' parameter, a
+ // SRCVALUE for the trampoline and another for the nested function (allowing
+ // targets to access the original Function*). It produces a token chain as
+ // output.
+ INIT_TRAMPOLINE,
+
+ // ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
+ // It takes a pointer to the trampoline and produces a (possibly) new
+ // pointer to the same trampoline with platform-specific adjustments
+ // applied. The pointer it returns points to an executable block of code.
+ ADJUST_TRAMPOLINE,
// TRAP - Trapping instruction
TRAP,
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 947cf1be7d..7646eaaf67 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -344,10 +344,14 @@ def int_annotation : Intrinsic<[llvm_anyint_ty],
//===------------------------ Trampoline Intrinsics -----------------------===//
//
-def int_init_trampoline : Intrinsic<[llvm_ptr_ty],
+def int_init_trampoline : Intrinsic<[],
[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
- [IntrReadWriteArgMem]>,
- GCCBuiltin<"__builtin_init_trampoline">;
+ [IntrReadWriteArgMem, NoCapture<0>]>,
+ GCCBuiltin<"__builtin_init_trampoline">;
+
+def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
+ [IntrReadArgMem]>,
+ GCCBuiltin<"__builtin_adjust_trampoline">;
//===------------------------ Overflow Intrinsics -------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b9d841e9d8..e672512256 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -872,7 +872,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Expand;
break;
- case ISD::TRAMPOLINE:
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
// These operations lie about being legal: when they claim to be legal,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1c7b93af3b..ec7bfbe495 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6054,7 +6054,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CTLZ: return "ctlz";
// Trampolines
- case ISD::TRAMPOLINE: return "trampoline";
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
case ISD::CONDCODE:
switch (cast<CondCodeSDNode>(this)->get()) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0b0b98d834..d8fa0c93db 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5016,12 +5016,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
- Res = DAG.getNode(ISD::TRAMPOLINE, dl,
- DAG.getVTList(TLI.getPointerTy(), MVT::Other),
- Ops, 6);
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
- setValue(&I, Res);
- DAG.setRoot(Res.getValue(1));
+ DAG.setRoot(Res);
+ return 0;
+ }
+ case Intrinsic::adjust_trampoline: {
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+ TLI.getPointerTy(),
+ getValue(I.getArgOperand(0))));
return 0;
}
case Intrinsic::gcroot:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index be94b08e75..39e6c2412f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -1373,8 +1374,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
}
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -1403,16 +1409,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+ LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
- SDValue Ops[] =
- { CallResult.first, CallResult.second };
-
- return DAG.getMergeValues(Ops, 2, dl);
+ return CallResult.second;
}
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
@@ -4499,7 +4502,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG, PPCSubTarget);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index a4f8e2a839..602f70abfc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -390,7 +390,8 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8005408412..fa5f720ef0 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -504,7 +504,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -9406,8 +9407,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
}
-SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Root = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -9471,9 +9477,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 22),
false, false, 0);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
@@ -9553,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
}
}
@@ -10356,7 +10358,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index db59097577..e83fea95cb 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -821,7 +821,8 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index d23cfe0b92..3926a7f87f 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -147,7 +147,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
maxStoresPerMemmove = maxStoresPerMemmoveOptSize
@@ -180,7 +181,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD:
case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
return SDValue();
@@ -789,7 +791,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
}
SDValue XCoreTargetLowering::
-LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue XCoreTargetLowering::
+LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -841,9 +848,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(TrmpAddr, 16), false, false,
0);
- SDValue Ops[] =
- { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 246da9eee5..d6c5b329a0 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -145,7 +145,8 @@ namespace llvm {
SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index c6bdb08998..be4454b878 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,7 @@
#define INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -214,7 +215,8 @@ private:
Instruction *visitCallSite(CallSite CS);
Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
bool transformConstExprCastCall(CallSite CS);
- Instruction *transformCallThroughTrampoline(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4c9cbdb4e6..7da3343fb4 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -821,6 +820,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
return Simplifier.NewInstruction;
}
+static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
+ // Strip off at most one level of pointer casts, looking for an alloca. This
+ // is good enough in practice and simpler than handling any number of casts.
+ Value *Underlying = TrampMem->stripPointerCasts();
+ if (Underlying != TrampMem &&
+ (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
+ return 0;
+ if (!isa<AllocaInst>(Underlying))
+ return 0;
+
+ IntrinsicInst *InitTrampoline = 0;
+ for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
+ I != E; I++) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
+ if (!II)
+ return 0;
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
+ if (InitTrampoline)
+ // More than one init_trampoline writes to this value. Give up.
+ return 0;
+ InitTrampoline = II;
+ continue;
+ }
+ if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
+ // Allow any number of calls to adjust.trampoline.
+ continue;
+ return 0;
+ }
+
+ // No call to init.trampoline found.
+ if (!InitTrampoline)
+ return 0;
+
+ // Check that the alloca is being used in the expected way.
+ if (InitTrampoline->getOperand(0) != TrampMem)
+ return 0;
+
+ return InitTrampoline;
+}
+
+static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
+ Value *TrampMem) {
+ // Visit all the previous instructions in the basic block, and try to find a
+ // init.trampoline which has a direct path to the adjust.trampoline.
+ for (BasicBlock::iterator I = AdjustTramp,
+ E = AdjustTramp->getParent()->begin(); I != E; ) {
+ Instruction *Inst = --I;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
+ II->getOperand(0) == TrampMem)
+ return II;
+ if (Inst->mayWriteToMemory())
+ return 0;
+ }
+ return 0;
+}
+
+// Given a call to llvm.adjust.trampoline, find and return the corresponding
+// call to llvm.init.trampoline if the call to the trampoline can be optimized
+// to a direct call to a function. Otherwise return NULL.
+//
+static IntrinsicInst *FindInitTrampoline(Value *Callee) {
+ Callee = Callee->stripPointerCasts();
+ IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
+ if (!AdjustTramp ||
+ AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
+ return 0;
+
+ Value *TrampMem = AdjustTramp->getOperand(0);
+
+ if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
+ return IT;
+ if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
+ return IT;
+ return 0;
+}
+
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -880,10 +956,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
return EraseInstFromFunction(*CS.getInstruction());
}
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
- if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
- if (In->getIntrinsicID() == Intrinsic::init_trampoline)
- return transformCallThroughTrampoline(CS);
+ if (IntrinsicInst *II = FindInitTrampoline(Callee))
+ return transformCallThroughTrampoline(CS, II);
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -1164,10 +1238,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return true;
}
-// transformCallThroughTrampoline - Turn a call to a function created by the
-// init_trampoline intrinsic into a direct call to the underlying function.
+// transformCallThroughTrampoline - Turn a call to a function created by
+// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
+// underlying function.
//
-Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+Instruction *
+InstCombiner::transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp) {
Value *Callee = CS.getCalledValue();
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -1178,8 +1255,8 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return 0;
- IntrinsicInst *Tramp =
- cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+ assert(Tramp &&
+ "transformCallThroughTrampoline called with incorrect CallSite.");
Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
PointerType *NestFPTy = cast<PointerType>(NestF->getType());
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 572018105a..04221d461d 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -43,6 +43,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
switch (Name[0]) {
default: break;
+ case 'i':
+ // This upgrades the old llvm.init.trampoline to the new
+ // llvm.init.trampoline and llvm.adjust.trampoline pair.
+ if (Name == "init.trampoline") {
+ // The new llvm.init.trampoline returns nothing.
+ if (FTy->getReturnType()->isVoidTy())
+ break;
+
+ assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
+
+ // Change the name of the old intrinsic so that we can play with its type.
+ std::string NameTmp = F->getName();
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(
+ NameTmp,
+ Type::getVoidTy(M->getContext()),
+ FTy->getParamType(0), FTy->getParamType(1),
+ FTy->getParamType(2), (Type *)0));
+ return true;
+ }
case 'p':
// This upgrades the llvm.prefetch intrinsic to accept one more parameter,
// which is a instruction / data cache identifier. The old version only
@@ -216,6 +236,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
break;
}
+ case Intrinsic::init_trampoline: {
+
+ // Transform
+ // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
+ // to
+ // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
+ // %tramp = call i8* llvm.adjust.trampoline (i8* %x)
+
+ Function *AdjustTrampolineFn =
+ cast<Function>(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::adjust_trampoline));
+
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI);
+
+ Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2));
+
+ CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
+ CI->getArgOperand(0),
+ CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(AdjustCall);
+ CI->eraseFromParent();
+ break;
+ }
}
}
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index eb4ac769be..daffa3d3af 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -40,3 +40,16 @@ define void @p(i8* %ptr) {
tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
ret void
}
+
+declare i32 @nest_f(i8* nest, i32)
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
+
+define void @test_trampolines() {
+; CHECK: call void @llvm.init.trampoline(i8* null, i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), i8* null)
+; CHECK: call i8* @llvm.adjust.trampoline(i8* null)
+
+ call i8* @llvm.init.trampoline(i8* null,
+ i8* bitcast (i32 (i8*, i32)* @nest_f to i8*),
+ i8* null)
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index bc05bb1763..91b201146b 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -67,7 +67,8 @@ entry:
store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4
%TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8* ; <i8*> [#uses=1]
%FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8* ; <i8*> [#uses=1]
- %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; <i8*> [#uses=1]
+ call void @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; <i8*> [#uses=1]
+ %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.91)
store i8* %tramp, i8** %0, align 4
%5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
%6 = load i8** %0, align 4 ; <i8*> [#uses=1]
@@ -113,7 +114,8 @@ return: ; preds = %entry
ret %struct.objc_object* %retval5
}
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare i8* @llvm.adjust.trampoline(i8*) nounwind
define internal void @__helper_1.1632(%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* nest %CHAIN.8, %struct.__block_1* %_self, %struct.CGImage* %cgImage) nounwind {
entry:
diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll
index 4e1aba025b..6b42134997 100644
--- a/test/CodeGen/XCore/trampoline.ll
+++ b/test/CodeGen/XCore/trampoline.ll
@@ -11,7 +11,8 @@ entry:
%FRAME.0 = alloca %struct.FRAME.f, align 4
%TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
%FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8*
- %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
+ call void @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
+ %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.23.sub)
%0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1
%1 = bitcast i8* %tramp to i32 ()*
store i32 ()* %1, i32 ()** %0, align 4
@@ -32,6 +33,7 @@ entry:
ret i32 %1
}
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare i8* @llvm.adjust.trampoline(i8*) nounwind
declare void @h(i32 ()*)
diff --git a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
index 9bb9408939..aacea9df5b 100644
--- a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
+++ b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
@@ -3,7 +3,8 @@
%struct.FRAME.nest = type { i32, i32 (...)* }
%struct.__builtin_trampoline = type { [10 x i8] }
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+declare i8* @llvm.adjust.trampoline(i8*) nounwind
declare i32 @f(%struct.FRAME.nest* nest , ...)
@@ -15,7 +16,8 @@ entry:
%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 %n, i32* %tmp3, align 8
%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8* ; <i8*> [#uses=1]
- %tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 ) ; <i8*> [#uses=1]
+ call void @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 ) ; <i8*> [#uses=1]
+ %tramp = call i8* @llvm.adjust.trampoline( i8* %TRAMP.216.sub)
%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1 ; <i32 (...)**> [#uses=1]
%tmp89 = bitcast i8* %tramp to i32 (...)* ; <i32 (...)*> [#uses=2]
store i32 (...)* %tmp89, i32 (...)** %tmp7, align 8