diff options
-rw-r--r-- | docs/LangRef.html | 71 | ||||
-rw-r--r-- | include/llvm/CodeGen/ISDOpcodes.h | 21 | ||||
-rw-r--r-- | include/llvm/Intrinsics.td | 10 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 22 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 23 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/XCore/XCoreISelLowering.cpp | 17 | ||||
-rw-r--r-- | lib/Target/XCore/XCoreISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 4 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 97 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 46 | ||||
-rw-r--r-- | test/Assembler/AutoUpgradeIntrinsics.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/trampoline.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/XCore/trampoline.ll | 6 | ||||
-rw-r--r-- | test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll | 6 |
19 files changed, 289 insertions, 81 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html index 2b9ee24bf4..0ec08eb0a0 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -275,9 +275,10 @@ </li> <li><a href="#int_debugger">Debugger intrinsics</a></li> <li><a href="#int_eh">Exception Handling intrinsics</a></li> - <li><a href="#int_trampoline">Trampoline Intrinsic</a> + <li><a href="#int_trampoline">Trampoline Intrinsics</a> <ol> <li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li> + <li><a href="#int_at">'<tt>llvm.adjust.trampoline</tt>' Intrinsic</a></li> </ol> </li> <li><a href="#int_atomics">Atomic intrinsics</a> @@ -7680,12 +7681,12 @@ LLVM</a>.</p> <!-- ======================================================================= --> <h3> - <a name="int_trampoline">Trampoline Intrinsic</a> + <a name="int_trampoline">Trampoline Intrinsics</a> </h3> <div> -<p>This intrinsic makes it possible to excise one parameter, marked with +<p>These intrinsics make it possible to excise one parameter, marked with the <a href="#nest"><tt>nest</tt></a> attribute, from a function. The result is a callable function pointer lacking the nest parameter - the caller does not need to @@ -7702,7 +7703,8 @@ LLVM</a>.</p> <pre class="doc_code"> %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86 %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0 - %p = call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval) + call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval) + %p = call i8* @llvm.adjust.trampoline(i8* %tramp1) %fp = bitcast i8* %p to i32 (i32, i32)* </pre> @@ -7720,12 +7722,12 @@ LLVM</a>.</p> <h5>Syntax:</h5> <pre> - declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>) + declare void @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>) </pre> <h5>Overview:</h5> -<p>This fills the memory pointed to by <tt>tramp</tt> with code and returns a - function pointer suitable for executing it.</p> +<p>This fills the memory pointed to by <tt>tramp</tt> with executable code, + turning it into a trampoline.</p> <h5>Arguments:</h5> <p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all @@ -7739,17 +7741,50 @@ LLVM</a>.</p> <h5>Semantics:</h5> <p>The block of memory pointed to by <tt>tramp</tt> is filled with target - dependent code, turning it into a function. A pointer to this function is - returned, but needs to be bitcast to an <a href="#int_trampoline">appropriate - function pointer type</a> before being called. The new function's signature - is the same as that of <tt>func</tt> with any arguments marked with - the <tt>nest</tt> attribute removed. At most one such <tt>nest</tt> argument - is allowed, and it must be of pointer type. Calling the new function is - equivalent to calling <tt>func</tt> with the same argument list, but - with <tt>nval</tt> used for the missing <tt>nest</tt> argument. If, after - calling <tt>llvm.init.trampoline</tt>, the memory pointed to - by <tt>tramp</tt> is modified, then the effect of any later call to the - returned function pointer is undefined.</p> + dependent code, turning it into a function. Then <tt>tramp</tt> needs to be + passed to <a href="#int_at">llvm.adjust.trampoline</a> to get a pointer + which can be <a href="#int_trampoline">bitcast (to a new function) and + called</a>. The new function's signature is the same as that of + <tt>func</tt> with any arguments marked with the <tt>nest</tt> attribute + removed. At most one such <tt>nest</tt> argument is allowed, and it must be of + pointer type. Calling the new function is equivalent to calling <tt>func</tt> + with the same argument list, but with <tt>nval</tt> used for the missing + <tt>nest</tt> argument. If, after calling <tt>llvm.init.trampoline</tt>, the + memory pointed to by <tt>tramp</tt> is modified, then the effect of any later call + to the returned function pointer is undefined.</p> +</div> + +<!-- _______________________________________________________________________ --> +<h4> + <a name="int_at"> + '<tt>llvm.adjust.trampoline</tt>' Intrinsic + </a> +</h4> + +<div> + +<h5>Syntax:</h5> +<pre> + declare i8* @llvm.adjust.trampoline(i8* <tramp>) +</pre> + +<h5>Overview:</h5> +<p>This performs any required machine-specific adjustment to the address of a + trampoline (passed as <tt>tramp</tt>).</p> + +<h5>Arguments:</h5> +<p><tt>tramp</tt> must point to a block of memory which already has trampoline code + filled in by a previous call to <a href="#int_it"><tt>llvm.init.trampoline</tt> + </a>.</p> + +<h5>Semantics:</h5> +<p>On some architectures the address of the code to be executed needs to be + different to the address where the trampoline is actually stored. This + intrinsic returns the executable address corresponding to <tt>tramp</tt> + after performing the required machine specific adjustments. + The pointer returned can then be <a href="#int_trampoline"> bitcast and + executed</a>. +</p> </div> diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 7f5625cd5e..14b1378e72 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -566,14 +566,19 @@ namespace ISD { // HANDLENODE node - Used as a handle for various purposes. HANDLENODE, - // TRAMPOLINE - This corresponds to the init_trampoline intrinsic. - // It takes as input a token chain, the pointer to the trampoline, - // the pointer to the nested function, the pointer to pass for the - // 'nest' parameter, a SRCVALUE for the trampoline and another for - // the nested function (allowing targets to access the original - // Function*). It produces the result of the intrinsic and a token - // chain as output. - TRAMPOLINE, + // INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic. It + // takes as input a token chain, the pointer to the trampoline, the pointer + // to the nested function, the pointer to pass for the 'nest' parameter, a + // SRCVALUE for the trampoline and another for the nested function (allowing + // targets to access the original Function*). It produces a token chain as + // output. + INIT_TRAMPOLINE, + + // ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic. + // It takes a pointer to the trampoline and produces a (possibly) new + // pointer to the same trampoline with platform-specific adjustments + // applied. The pointer it returns points to an executable block of code. + ADJUST_TRAMPOLINE, // TRAP - Trapping instruction TRAP, diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 947cf1be7d..7646eaaf67 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -344,10 +344,14 @@ def int_annotation : Intrinsic<[llvm_anyint_ty], //===------------------------ Trampoline Intrinsics -----------------------===// // -def int_init_trampoline : Intrinsic<[llvm_ptr_ty], +def int_init_trampoline : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], - [IntrReadWriteArgMem]>, - GCCBuiltin<"__builtin_init_trampoline">; + [IntrReadWriteArgMem, NoCapture<0>]>, + GCCBuiltin<"__builtin_init_trampoline">; + +def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrReadArgMem]>, + GCCBuiltin<"__builtin_adjust_trampoline">; //===------------------------ Overflow Intrinsics -------------------------===// // diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b9d841e9d8..e672512256 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -872,7 +872,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; - case ISD::TRAMPOLINE: + case ISD::INIT_TRAMPOLINE: + case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: // These operations lie about being legal: when they claim to be legal, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1c7b93af3b..ec7bfbe495 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6054,7 +6054,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTLZ: return "ctlz"; // Trampolines - case ISD::TRAMPOLINE: return "trampoline"; + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; case ISD::CONDCODE: switch (cast<CondCodeSDNode>(this)->get()) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0b0b98d834..d8fa0c93db 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5016,12 +5016,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::TRAMPOLINE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), - Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); - setValue(&I, Res); - DAG.setRoot(Res.getValue(1)); + DAG.setRoot(Res); + return 0; + } + case Intrinsic::adjust_trampoline: { + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, + TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::gcroot: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index be94b08e75..39e6c2412f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -1373,8 +1374,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } -SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -1403,16 +1409,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()), + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); - SDValue Ops[] = - { CallResult.first, CallResult.second }; - - return DAG.getMergeValues(Ops, 2, dl); + return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, @@ -4499,7 +4502,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, PPCSubTarget); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index a4f8e2a839..602f70abfc 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -390,7 +390,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8005408412..fa5f720ef0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -504,7 +504,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -9406,8 +9407,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); } -SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -9471,9 +9477,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 22), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6); } else { const Function *Func = cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); @@ -9553,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 6), false, false, 1); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4); } } @@ -10356,7 +10358,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index db59097577..e83fea95cb 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -821,7 +821,8 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index d23cfe0b92..3926a7f87f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -147,7 +147,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; maxStoresPerMemmove = maxStoresPerMemmoveOptSize @@ -180,7 +181,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -789,7 +791,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, } SDValue XCoreTargetLowering:: -LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { +LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue XCoreTargetLowering:: +LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -841,9 +848,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(TrmpAddr, 16), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 246da9eee5..d6c5b329a0 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -145,7 +145,8 @@ namespace llvm { SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair<unsigned, const TargetRegisterClass*> diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index c6bdb08998..be4454b878 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -214,7 +215,8 @@ private: Instruction *visitCallSite(CallSite CS); Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4c9cbdb4e6..7da3343fb4 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -821,6 +820,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { return Simplifier.NewInstruction; } +static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { + // Strip off at most one level of pointer casts, looking for an alloca. This + // is good enough in practice and simpler than handling any number of casts. + Value *Underlying = TrampMem->stripPointerCasts(); + if (Underlying != TrampMem && + (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem)) + return 0; + if (!isa<AllocaInst>(Underlying)) + return 0; + + IntrinsicInst *InitTrampoline = 0; + for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end(); + I != E; I++) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I); + if (!II) + return 0; + if (II->getIntrinsicID() == Intrinsic::init_trampoline) { + if (InitTrampoline) + // More than one init_trampoline writes to this value. Give up. + return 0; + InitTrampoline = II; + continue; + } + if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) + // Allow any number of calls to adjust.trampoline. + continue; + return 0; + } + + // No call to init.trampoline found. + if (!InitTrampoline) + return 0; + + // Check that the alloca is being used in the expected way. + if (InitTrampoline->getOperand(0) != TrampMem) + return 0; + + return InitTrampoline; +} + +static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, + Value *TrampMem) { + // Visit all the previous instructions in the basic block, and try to find a + // init.trampoline which has a direct path to the adjust.trampoline. + for (BasicBlock::iterator I = AdjustTramp, + E = AdjustTramp->getParent()->begin(); I != E; ) { + Instruction *Inst = --I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::init_trampoline && + II->getOperand(0) == TrampMem) + return II; + if (Inst->mayWriteToMemory()) + return 0; + } + return 0; +} + +// Given a call to llvm.adjust.trampoline, find and return the corresponding +// call to llvm.init.trampoline if the call to the trampoline can be optimized +// to a direct call to a function. Otherwise return NULL. +// +static IntrinsicInst *FindInitTrampoline(Value *Callee) { + Callee = Callee->stripPointerCasts(); + IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); + if (!AdjustTramp || + AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) + return 0; + + Value *TrampMem = AdjustTramp->getOperand(0); + + if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) + return IT; + if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) + return IT; + return 0; +} + // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { @@ -880,10 +956,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { return EraseInstFromFunction(*CS.getInstruction()); } - if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) - if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); + if (IntrinsicInst *II = FindInitTrampoline(Callee)) + return transformCallThroughTrampoline(CS, II); PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); @@ -1164,10 +1238,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return true; } -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. +// transformCallThroughTrampoline - Turn a call to a function created by +// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the +// underlying function. // -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { +Instruction * +InstCombiner::transformCallThroughTrampoline(CallSite CS, + IntrinsicInst *Tramp) { Value *Callee = CS.getCalledValue(); PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); @@ -1178,8 +1255,8 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (Attrs.hasAttrSomewhere(Attribute::Nest)) return 0; - IntrinsicInst *Tramp = - cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); + assert(Tramp && + "transformCallThroughTrampoline called with incorrect CallSite."); Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); PointerType *NestFPTy = cast<PointerType>(NestF->getType()); diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 572018105a..04221d461d 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -43,6 +43,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { switch (Name[0]) { default: break; + case 'i': + // This upgrades the old llvm.init.trampoline to the new + // llvm.init.trampoline and llvm.adjust.trampoline pair. + if (Name == "init.trampoline") { + // The new llvm.init.trampoline returns nothing. + if (FTy->getReturnType()->isVoidTy()) + break; + + assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!"); + + // Change the name of the old intrinsic so that we can play with its type. + std::string NameTmp = F->getName(); + F->setName(""); + NewFn = cast<Function>(M->getOrInsertFunction( + NameTmp, + Type::getVoidTy(M->getContext()), + FTy->getParamType(0), FTy->getParamType(1), + FTy->getParamType(2), (Type *)0)); + return true; + } case 'p': // This upgrades the llvm.prefetch intrinsic to accept one more parameter, // which is a instruction / data cache identifier. The old version only @@ -216,6 +236,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); break; } + case Intrinsic::init_trampoline: { + + // Transform + // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z) + // to + // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z) + // %tramp = call i8* llvm.adjust.trampoline (i8* %x) + + Function *AdjustTrampolineFn = + cast<Function>(Intrinsic::getDeclaration(F->getParent(), + Intrinsic::adjust_trampoline)); + + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI); + + Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2)); + + CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn, + CI->getArgOperand(0), + CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(AdjustCall); + CI->eraseFromParent(); + break; + } } } diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index eb4ac769be..daffa3d3af 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -40,3 +40,16 @@ define void @p(i8* %ptr) { tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1) ret void } + +declare i32 @nest_f(i8* nest, i32) +declare i8* @llvm.init.trampoline(i8*, i8*, i8*) + +define void @test_trampolines() { +; CHECK: call void @llvm.init.trampoline(i8* null, i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), i8* null) +; CHECK: call i8* @llvm.adjust.trampoline(i8* null) + + call i8* @llvm.init.trampoline(i8* null, + i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), + i8* null) + ret void +} diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll index bc05bb1763..91b201146b 100644 --- a/test/CodeGen/PowerPC/trampoline.ll +++ b/test/CodeGen/PowerPC/trampoline.ll @@ -67,7 +67,8 @@ entry: store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4 %TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8* ; <i8*> [#uses=1] %FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8* ; <i8*> [#uses=1] - %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; <i8*> [#uses=1] + call void @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; <i8*> [#uses=1] + %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.91) store i8* %tramp, i8** %0, align 4 %5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1] %6 = load i8** %0, align 4 ; <i8*> [#uses=1] @@ -113,7 +114,8 @@ return: ; preds = %entry ret %struct.objc_object* %retval5 } -declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare i8* @llvm.adjust.trampoline(i8*) nounwind define internal void @__helper_1.1632(%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* nest %CHAIN.8, %struct.__block_1* %_self, %struct.CGImage* %cgImage) nounwind { entry: diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll index 4e1aba025b..6b42134997 100644 --- a/test/CodeGen/XCore/trampoline.ll +++ b/test/CodeGen/XCore/trampoline.ll @@ -11,7 +11,8 @@ entry: %FRAME.0 = alloca %struct.FRAME.f, align 4 %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0 %FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8* - %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02) + call void @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02) + %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.23.sub) %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1 %1 = bitcast i8* %tramp to i32 ()* store i32 ()* %1, i32 ()** %0, align 4 @@ -32,6 +33,7 @@ entry: ret i32 %1 } -declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare i8* @llvm.adjust.trampoline(i8*) nounwind declare void @h(i32 ()*) diff --git a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll index 9bb9408939..aacea9df5b 100644 --- a/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll +++ b/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll @@ -3,7 +3,8 @@ %struct.FRAME.nest = type { i32, i32 (...)* } %struct.__builtin_trampoline = type { [10 x i8] } -declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind +declare i8* @llvm.adjust.trampoline(i8*) nounwind declare i32 @f(%struct.FRAME.nest* nest , ...) @@ -15,7 +16,8 @@ entry: %tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0 ; <i32*> [#uses=1] store i32 %n, i32* %tmp3, align 8 %FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8* ; <i8*> [#uses=1] - %tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 ) ; <i8*> [#uses=1] + call void @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 ) ; <i8*> [#uses=1] + %tramp = call i8* @llvm.adjust.trampoline( i8* %TRAMP.216.sub) %tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1 ; <i32 (...)**> [#uses=1] %tmp89 = bitcast i8* %tramp to i32 (...)* ; <i32 (...)*> [#uses=2] store i32 (...)* %tmp89, i32 (...)** %tmp7, align 8 |