From 8fa89291774a29ee30adb9d0fd01655c84eaac13 Mon Sep 17 00:00:00 2001 From: Gordon Henriksen Date: Mon, 7 Jan 2008 01:30:53 +0000 Subject: With this patch, the LowerGC transformation becomes the ShadowStackCollector, which additionally has reduced overhead with no sacrifice in portability. Considering a function @fun with 8 loop-local roots, ShadowStackCollector introduces the following overhead (x86): ; shadowstack prologue movl L_llvm_gc_root_chain$non_lazy_ptr, %eax movl (%eax), %ecx movl $___gc_fun, 20(%esp) movl $0, 24(%esp) movl $0, 28(%esp) movl $0, 32(%esp) movl $0, 36(%esp) movl $0, 40(%esp) movl $0, 44(%esp) movl $0, 48(%esp) movl $0, 52(%esp) movl %ecx, 16(%esp) leal 16(%esp), %ecx movl %ecx, (%eax) ; shadowstack loop overhead (none) ; shadowstack epilogue movl 48(%esp), %edx movl %edx, (%ecx) ; shadowstack metadata .align 3 ___gc_fun: # __gc_fun .long 8 .space 4 In comparison to LowerGC: ; lowergc prologue movl L_llvm_gc_root_chain$non_lazy_ptr, %eax movl (%eax), %ecx movl %ecx, 48(%esp) movl $8, 52(%esp) movl $0, 60(%esp) movl $0, 56(%esp) movl $0, 68(%esp) movl $0, 64(%esp) movl $0, 76(%esp) movl $0, 72(%esp) movl $0, 84(%esp) movl $0, 80(%esp) movl $0, 92(%esp) movl $0, 88(%esp) movl $0, 100(%esp) movl $0, 96(%esp) movl $0, 108(%esp) movl $0, 104(%esp) movl $0, 116(%esp) movl $0, 112(%esp) ; lowergc loop overhead leal 44(%esp), %eax movl %eax, 56(%esp) leal 40(%esp), %eax movl %eax, 64(%esp) leal 36(%esp), %eax movl %eax, 72(%esp) leal 32(%esp), %eax movl %eax, 80(%esp) leal 28(%esp), %eax movl %eax, 88(%esp) leal 24(%esp), %eax movl %eax, 96(%esp) leal 20(%esp), %eax movl %eax, 104(%esp) leal 16(%esp), %eax movl %eax, 112(%esp) ; lowergc epilogue movl 48(%esp), %edx movl %edx, (%ecx) ; lowergc metadata (none) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45670 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/LinkAllCodegenComponents.h | 3 + include/llvm/LinkAllPasses.h | 1 - include/llvm/Transforms/Scalar.h | 7 - lib/CodeGen/ShadowStackCollector.cpp | 441 ++++++++++++++++++++++++ lib/Transforms/Scalar/LowerGC.cpp | 350 ------------------- runtime/GC/SemiSpace/semispace.c | 32 +- test/CodeGen/Generic/GC/redundant_init.ll | 17 + 7 files changed, 478 insertions(+), 373 deletions(-) create mode 100644 lib/CodeGen/ShadowStackCollector.cpp create mode 100644 test/CodeGen/Generic/GC/redundant_init.ll diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h index 05d97cf943..48be5411bc 100644 --- a/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/Collectors.h" namespace { struct ForceCodegenLinking { @@ -35,6 +36,8 @@ namespace { (void) llvm::createSimpleRegisterCoalescer(); + (void) llvm::createShadowStackCollector(); + (void) llvm::createBURRListDAGScheduler(NULL, NULL, NULL); (void) llvm::createTDRRListDAGScheduler(NULL, NULL, NULL); (void) llvm::createTDListDAGScheduler(NULL, NULL, NULL); diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 75ab81e83a..9555dbc76f 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -84,7 +84,6 @@ namespace { (void) llvm::createLoopRotatePass(); (void) llvm::createLoopIndexSplitPass(); (void) llvm::createLowerAllocationsPass(); - (void) llvm::createLowerGCPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerPackedPass(); (void) llvm::createLowerSelectPass(); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index f1a101f602..4e3b21af6c 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -294,13 +294,6 @@ FunctionPass *createLowerPackedPass(); FunctionPass *createLowerInvokePass(const TargetLowering *TLI = NULL); extern const PassInfo *LowerInvokePassID; -//===----------------------------------------------------------------------===// -// -// LowerGCPass - This function returns an instance of the "lowergc" pass, which -// lowers garbage collection intrinsics to normal LLVM code. -// -FunctionPass *createLowerGCPass(); - //===----------------------------------------------------------------------===// // // BlockPlacement - This pass reorders basic blocks in order to increase the diff --git a/lib/CodeGen/ShadowStackCollector.cpp b/lib/CodeGen/ShadowStackCollector.cpp new file mode 100644 index 0000000000..1b619c9668 --- /dev/null +++ b/lib/CodeGen/ShadowStackCollector.cpp @@ -0,0 +1,441 @@ +//===-- ShadowStackCollector.cpp - GC support for uncooperative targets ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as collectors which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// this collector. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shadowstackgc" +#include "llvm/CodeGen/Collectors.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Collector.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/LLVMBuilder.h" +#include "llvm/Analysis/Verifier.h" +#include + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN ShadowStackCollector : public Collector { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + const StructType *StackEntryTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector > Roots; + + public: + ShadowStackCollector(); + + bool initializeCustomLowering(Module &M); + bool performCustomLowering(Function &F); + + private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + const Type* GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr, + int Idx1, const char *Name); + static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr, + int Idx1, int Idx2, const char *Name); + }; + + CollectorRegistry::Add + Y("shadow-stack", + "Very portable collector for uncooperative code generators"); + + /// EscapeEnumerator - This is a little algorithm to find all escape points + /// from a function so that "finally"-style code can be inserted. In addition + /// to finding the existing return and unwind instructions, it also (if + /// necessary) transforms any call instructions into invokes and sends them to + /// a landing pad. + /// + /// It's wrapped up in a state machine using the same transform C# uses for + /// 'yield return' enumerators, This transform allows it to be non-allocating. + class VISIBILITY_HIDDEN EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + LLVMBuilder Builder; + + public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0) {} + + LLVMBuilder *Next() { + switch (State) { + default: + return 0; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return' and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind and return do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa(TI) && !isa(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector Calls; + for (Function::iterator BB = F.begin(), + E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), + EE = BB->end(); II != EE; ++II) + if (CallInst *CI = dyn_cast(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return 0; + + // Create a cleanup block. + BasicBlock *CleanupBB = new BasicBlock(CleanupBBName, &F); + UnwindInst *UI = new UnwindInst(CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector Args; + for (unsigned I = Calls.size(); I != 0; ) { + CallInst *CI = cast(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + Args.append(CI->op_begin() + 1, CI->op_end()); + + InvokeInst *II = new InvokeInst(CI->getOperand(0), + NewBB, CleanupBB, + Args.begin(), Args.end(), + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setParamAttrs(CI->getParamAttrs()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(UI->getParent(), UI); + return &Builder; + } + } + }; + +} + +// ----------------------------------------------------------------------------- + +Collector *llvm::createShadowStackCollector() { + return new ShadowStackCollector(); +} + +ShadowStackCollector::ShadowStackCollector() : Head(0), StackEntryTy(0) { + InitRoots = true; + CustomRoots = true; +} + +Constant *ShadowStackCollector::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + + Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast(Roots[I].first->getOperand(2)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + + Constant *BaseElts[] = { + ConstantInt::get(Type::Int32Ty, Roots.size(), false), + ConstantInt::get(Type::Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(BaseElts, 2), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), + Metadata.begin(), NumMeta) + }; + + Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2); + + std::string TypeName("gc_map."); + TypeName += utostr(NumMeta); + F.getParent()->addTypeName(TypeName, FrameMap->getType()); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(FrameMap->getType(), true, + GlobalVariable::InternalLinkage, + FrameMap, "__gc_" + F.getName(), + F.getParent()); + + Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, 0) }; + return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); +} + +const Type* ShadowStackCollector::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + Type *Ty = StructType::get(EltTys); + + std::string TypeName("gc_stackentry."); + TypeName += F.getName(); + F.getParent()->addTypeName(TypeName, Ty); + + return Ty; +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackCollector::initializeCustomLowering(Module &M) { + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector EltTys; + EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array. + StructType *FrameMapTy = StructType::get(EltTys); + M.addTypeName("gc_map", FrameMapTy); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + OpaqueType *RecursiveTy = OpaqueType::get(); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(FrameMapPtrTy); + PATypeHolder LinkTyH = StructType::get(EltTys); + + RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); + StackEntryTy = cast(LinkTyH.get()); + const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from + // a FunctionPass? + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable(StackEntryPtrTy, false, + GlobalValue::LinkOnceLinkage, + Constant::getNullValue(StackEntryPtrTy), + "llvm_gc_root_chain", &M); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceLinkage); + } + + return true; +} + +bool ShadowStackCollector::IsNullValue(Value *V) { + if (Constant *C = dyn_cast(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackCollector::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector,16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair Pair = std::make_pair( + CI, cast( + IntrinsicInst::StripPointerCasts(CI->getOperand(1)))); + if (IsNullValue(CI->getOperand(2))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst * +ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr, + int Idx, int Idx2, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, Idx), + ConstantInt::get(Type::Int32Ty, Idx2) }; + return B.CreateGEP(BasePtr, Indices, Indices + 3, Name); +} + +GetElementPtrInst * +ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), + ConstantInt::get(Type::Int32Ty, Idx) }; + return B.CreateGEP(BasePtr, Indices, Indices + 2, Name); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackCollector::performCustomLowering(Function &F) { + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + LLVMBuilder AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, + "gc_frame"); + + while (isa(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by Collector::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state), + // but it's nicer not to push the half-initialized entry onto the stack. + while (isa(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (LLVMBuilder *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + F.dump(); + + Roots.clear(); + return true; +} diff --git a/lib/Transforms/Scalar/LowerGC.cpp b/lib/Transforms/Scalar/LowerGC.cpp index 89749862d2..e69de29bb2 100644 --- a/lib/Transforms/Scalar/LowerGC.cpp +++ b/lib/Transforms/Scalar/LowerGC.cpp @@ -1,350 +0,0 @@ -//===-- LowerGC.cpp - Provide GC support for targets that don't -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements lowering for the llvm.gc* intrinsics for targets that do -// not natively support them (which includes the C backend). Note that the code -// generated is not as efficient as it would be for targets that natively -// support the GC intrinsics, but it is useful for getting new targets -// up-and-running quickly. -// -// This pass implements the code transformation described in this paper: -// "Accurate Garbage Collection in an Uncooperative Environment" -// Fergus Henderson, ISMM, 2002 -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lowergc" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" -#include "llvm/ADT/SmallVector.h" -using namespace llvm; - -namespace { - class VISIBILITY_HIDDEN LowerGC : public FunctionPass { - /// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the - /// llvm.gcread/llvm.gcwrite/llvm.gcroot intrinsics. - Function *GCRootInt, *GCReadInt, *GCWriteInt; - - /// GCRead/GCWrite - These are the functions provided by the garbage - /// collector for read/write barriers. - Constant *GCRead, *GCWrite; - - /// RootChain - This is the global linked-list that contains the chain of GC - /// roots. - GlobalVariable *RootChain; - - /// MainRootRecordType - This is the type for a function root entry if it - /// had zero roots. - const Type *MainRootRecordType; - public: - static char ID; // Pass identification, replacement for typeid - LowerGC() : FunctionPass((intptr_t)&ID), - GCRootInt(0), GCReadInt(0), GCWriteInt(0), - GCRead(0), GCWrite(0), RootChain(0), MainRootRecordType(0) {} - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - private: - const StructType *getRootRecordType(unsigned NumRoots); - }; - - char LowerGC::ID = 0; - RegisterPass - X("lowergc", "Lower GC intrinsics, for GCless code generators"); -} - -/// createLowerGCPass - This function returns an instance of the "lowergc" -/// pass, which lowers garbage collection intrinsics to normal LLVM code. -FunctionPass *llvm::createLowerGCPass() { - return new LowerGC(); -} - -/// getRootRecordType - This function creates and returns the type for a root -/// record containing 'NumRoots' roots. -const StructType *LowerGC::getRootRecordType(unsigned NumRoots) { - // Build a struct that is a type used for meta-data/root pairs. - std::vector ST; - ST.push_back(GCRootInt->getFunctionType()->getParamType(0)); - ST.push_back(GCRootInt->getFunctionType()->getParamType(1)); - StructType *PairTy = StructType::get(ST); - - // Build the array of pairs. - ArrayType *PairArrTy = ArrayType::get(PairTy, NumRoots); - - // Now build the recursive list type. - PATypeHolder RootListH = - MainRootRecordType ? (Type*)MainRootRecordType : (Type*)OpaqueType::get(); - ST.clear(); - ST.push_back(PointerType::getUnqual(RootListH)); // Prev pointer - ST.push_back(Type::Int32Ty); // NumElements in array - ST.push_back(PairArrTy); // The pairs - StructType *RootList = StructType::get(ST); - if (MainRootRecordType) - return RootList; - - assert(NumRoots == 0 && "The main struct type should have zero entries!"); - cast((Type*)RootListH.get())->refineAbstractTypeTo(RootList); - MainRootRecordType = RootListH; - return cast(RootListH.get()); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now. If -/// not, this pass does not do anything. -bool LowerGC::doInitialization(Module &M) { - GCRootInt = M.getFunction("llvm.gcroot"); - GCReadInt = M.getFunction("llvm.gcread"); - GCWriteInt = M.getFunction("llvm.gcwrite"); - if (!GCRootInt && !GCReadInt && !GCWriteInt) return false; - - PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty); - PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr); - - // If the program is using read/write barriers, find the implementations of - // them from the GC runtime library. - if (GCReadInt) // Make: sbyte* %llvm_gc_read(sbyte**) - GCRead = M.getOrInsertFunction("llvm_gc_read", VoidPtr, VoidPtr, VoidPtrPtr, - (Type *)0); - if (GCWriteInt) // Make: void %llvm_gc_write(sbyte*, sbyte**) - GCWrite = M.getOrInsertFunction("llvm_gc_write", Type::VoidTy, - VoidPtr, VoidPtr, VoidPtrPtr, (Type *)0); - - // If the program has GC roots, get or create the global root list. - if (GCRootInt) { - const StructType *RootListTy = getRootRecordType(0); - const Type *PRLTy = PointerType::getUnqual(RootListTy); - M.addTypeName("llvm_gc_root_ty", RootListTy); - - // Get the root chain if it already exists. - RootChain = M.getGlobalVariable("llvm_gc_root_chain", PRLTy); - if (RootChain == 0) { - // If the root chain does not exist, insert a new one with linkonce - // linkage! - RootChain = new GlobalVariable(PRLTy, false, - GlobalValue::LinkOnceLinkage, - Constant::getNullValue(PRLTy), - "llvm_gc_root_chain", &M); - } else if (RootChain->hasExternalLinkage() && RootChain->isDeclaration()) { - RootChain->setInitializer(Constant::getNullValue(PRLTy)); - RootChain->setLinkage(GlobalValue::LinkOnceLinkage); - } - } - return true; -} - -/// Coerce - If the specified operand number of the specified instruction does -/// not have the specified type, insert a cast. Note that this only uses BitCast -/// because the types involved are all pointers. -static void Coerce(Instruction *I, unsigned OpNum, Type *Ty) { - if (I->getOperand(OpNum)->getType() != Ty) { - if (Constant *C = dyn_cast(I->getOperand(OpNum))) - I->setOperand(OpNum, ConstantExpr::getBitCast(C, Ty)); - else { - CastInst *CI = new BitCastInst(I->getOperand(OpNum), Ty, "", I); - I->setOperand(OpNum, CI); - } - } -} - -/// runOnFunction - If the program is using GC intrinsics, replace any -/// read/write intrinsics with the appropriate read/write barrier calls, then -/// inline them. Finally, build the data structures for -bool LowerGC::runOnFunction(Function &F) { - // Quick exit for programs that are not using GC mechanisms. - if (!GCRootInt && !GCReadInt && !GCWriteInt) return false; - - PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty); - PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr); - - // If there are read/write barriers in the program, perform a quick pass over - // the function eliminating them. While we are at it, remember where we see - // calls to llvm.gcroot. - std::vector GCRoots; - std::vector NormalCalls; - - bool MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) - if (CallInst *CI = dyn_cast(II++)) { - if (!CI->getCalledFunction() || - !CI->getCalledFunction()->isIntrinsic()) - NormalCalls.push_back(CI); // Remember all normal function calls. - - if (Function *F = CI->getCalledFunction()) - if (F == GCRootInt) - GCRoots.push_back(CI); - else if (F == GCReadInt || F == GCWriteInt) { - if (F == GCWriteInt) { - // Change a llvm.gcwrite call to call llvm_gc_write instead. - CI->setOperand(0, GCWrite); - // Insert casts of the operands as needed. - Coerce(CI, 1, VoidPtr); - Coerce(CI, 2, VoidPtr); - Coerce(CI, 3, VoidPtrPtr); - } else { - Coerce(CI, 1, VoidPtr); - Coerce(CI, 2, VoidPtrPtr); - if (CI->getType() == VoidPtr) { - CI->setOperand(0, GCRead); - } else { - // Create a whole new call to replace the old one. - - // It sure would be nice to pass op_begin()+1, - // op_begin()+2 but it runs into trouble with - // CallInst::init's &*iterator, which requires a - // conversion from Use* to Value*. The conversion - // from Use to Value * is not useful because the - // memory for Value * won't be contiguous. - Value* Args[] = { - CI->getOperand(1), - CI->getOperand(2) - }; - CallInst *NC = new CallInst(GCRead, Args, Args + 2, - CI->getName(), CI); - // These functions only deal with ptr type results so BitCast - // is the correct kind of cast (no-op cast). - Value *NV = new BitCastInst(NC, CI->getType(), "", CI); - CI->replaceAllUsesWith(NV); - BB->getInstList().erase(CI); - CI = NC; - } - } - - MadeChange = true; - } - } - - // If there are no GC roots in this function, then there is no need to create - // a GC list record for it. - if (GCRoots.empty()) return MadeChange; - - // Okay, there are GC roots in this function. On entry to the function, add a - // record to the llvm_gc_root_chain, and remove it on exit. - - // Create the alloca, and zero it out. - const StructType *RootListTy = getRootRecordType(GCRoots.size()); - AllocaInst *AI = new AllocaInst(RootListTy, 0, "gcroots", F.begin()->begin()); - - // Insert the memset call after all of the allocas in the function. - BasicBlock::iterator IP = AI; - while (isa(IP)) ++IP; - - Constant *Zero = ConstantInt::get(Type::Int32Ty, 0); - Constant *One = ConstantInt::get(Type::Int32Ty, 1); - - Value *Idx[2] = { Zero, Zero }; - - // Get a pointer to the prev pointer. - Value *PrevPtrPtr = new GetElementPtrInst(AI, Idx, Idx + 2, - "prevptrptr", IP); - - // Load the previous pointer. - Value *PrevPtr = new LoadInst(RootChain, "prevptr", IP); - // Store the previous pointer into the prevptrptr - new StoreInst(PrevPtr, PrevPtrPtr, IP); - - // Set the number of elements in this record. - Idx[1] = One; - Value *NumEltsPtr = new GetElementPtrInst(AI, Idx, Idx + 2, - "numeltsptr", IP); - new StoreInst(ConstantInt::get(Type::Int32Ty, GCRoots.size()), NumEltsPtr,IP); - - Value* Par[4]; - Par[0] = Zero; - Par[1] = ConstantInt::get(Type::Int32Ty, 2); - - const PointerType *PtrLocTy = - cast(GCRootInt->getFunctionType()->getParamType(0)); - Constant *Null = ConstantPointerNull::get(PtrLocTy); - - // Initialize all of the gcroot records now. - for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) { - // Initialize the meta-data pointer. - Par[2] = ConstantInt::get(Type::Int32Ty, i); - Par[3] = One; - Value *MetaDataPtr = new GetElementPtrInst(AI, Par, Par + 4, - "MetaDataPtr", IP); - assert(isa(GCRoots[i]->getOperand(2)) && "Must be a constant"); - new StoreInst(GCRoots[i]->getOperand(2), MetaDataPtr, IP); - - // Initialize the root pointer to null on entry to the function. - Par[3] = Zero; - Value *RootPtrPtr = new GetElementPtrInst(AI, Par, Par + 4, - "RootEntPtr", IP); - new StoreInst(Null, RootPtrPtr, IP); - - // Each occurrance of the llvm.gcroot intrinsic now turns into an - // initialization of the slot with the address. - new StoreInst(GCRoots[i]->getOperand(1), RootPtrPtr, GCRoots[i]); - } - - // Now that the record is all initialized, store the pointer into the global - // pointer. - Value *C = new BitCastInst(AI, PointerType::getUnqual(MainRootRecordType), "", IP); - new StoreInst(C, RootChain, IP); - - // Eliminate all the gcroot records now. - for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) - GCRoots[i]->getParent()->getInstList().erase(GCRoots[i]); - - // On exit from the function we have to remove the entry from the GC root - // chain. Doing this is straight-forward for return and unwind instructions: - // just insert the appropriate copy. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (isa(BB->getTerminator()) || - isa(BB->getTerminator())) { - // We could reuse the PrevPtr loaded on entry to the function, but this - // would make the value live for the whole function, which is probably a - // bad idea. Just reload the value out of our stack entry. - PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", BB->getTerminator()); - new StoreInst(PrevPtr, RootChain, BB->getTerminator()); - } - - // If an exception is thrown from a callee we have to make sure to - // unconditionally take the record off the stack. For this reason, we turn - // all call instructions into invoke whose cleanup pops the entry off the - // stack. We only insert one cleanup block, which is shared by all invokes. - if (!NormalCalls.empty()) { - // Create the shared cleanup block. - BasicBlock *Cleanup = new BasicBlock("gc_cleanup", &F); - UnwindInst *UI = new UnwindInst(Cleanup); - PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", UI); - new StoreInst(PrevPtr, RootChain, UI); - - // Loop over all of the function calls, turning them into invokes. - while (!NormalCalls.empty()) { - CallInst *CI = NormalCalls.back(); - BasicBlock *CBB = CI->getParent(); - NormalCalls.pop_back(); - - // Split the basic block containing the function call. - BasicBlock *NewBB = CBB->splitBasicBlock(CI, CBB->getName()+".cont"); - - // Remove the unconditional branch inserted at the end of the CBB. - CBB->getInstList().pop_back(); - NewBB->getInstList().remove(CI); - - // Create a new invoke instruction. - std::vector Args(CI->op_begin()+1, CI->op_end()); - - Value *II = new InvokeInst(CI->getCalledValue(), NewBB, Cleanup, - Args.begin(), Args.end(), CI->getName(), CBB); - cast(II)->setCallingConv(CI->getCallingConv()); - cast(II)->setParamAttrs(CI->getParamAttrs()); - CI->replaceAllUsesWith(II); - delete CI; - } - } - - return true; -} diff --git a/runtime/GC/SemiSpace/semispace.c b/runtime/GC/SemiSpace/semispace.c index b8ef188fed..40af1cb2e3 100644 --- a/runtime/GC/SemiSpace/semispace.c +++ b/runtime/GC/SemiSpace/semispace.c @@ -97,24 +97,26 @@ void llvm_gc_write(void *V, void *ObjPtr, void **FieldPtr) { *FieldPtr = V; } * FIXME: This should be in a code-generator specific library, but for now this * will work for all code generators. */ -typedef struct GCRoot { - void **RootPtr; - void *Meta; -} GCRoot; - -typedef struct GCRoots { - struct GCRoots *Next; - unsigned NumRoots; - GCRoot RootRecords[]; -} GCRoots; -GCRoots *llvm_gc_root_chain; +struct FrameMap { + int32_t NumRoots; // Number of roots in stack frame. + int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + void *Meta[]; // May be absent for roots without metadata. +}; + +struct StackEntry { + ShadowStackEntry *Next; // Caller's stack entry. + const FrameMap *Map; // Pointer to constant FrameMap. + void *Roots[]; // Stack roots (in-place array). +}; +StackEntry *llvm_gc_root_chain; void llvm_cg_walk_gcroots(void (*FP)(void **Root, void *Meta)) { - GCRoots *R = llvm_gc_root_chain; - for (; R; R = R->Next) { + for (StackEntry *R; R; R = R->Next) { unsigned i, e; - for (i = 0, e = R->NumRoots; i != e; ++i) - FP(R->RootRecords[i].RootPtr, R->RootRecords[i].Meta); + for (i = 0, e = R->NumMeta; i != e; ++i) + FP(&R->Roots[i], R->Map->Meta[i]); + for (e = R->NumRoots; i != e; ++i) + FP(&R->Roots[i], NULL); } } /* END FIXME! */ diff --git a/test/CodeGen/Generic/GC/redundant_init.ll b/test/CodeGen/Generic/GC/redundant_init.ll new file mode 100644 index 0000000000..4499603474 --- /dev/null +++ b/test/CodeGen/Generic/GC/redundant_init.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s | llc -march=x86 | \ +; RUN: ignore grep {movl..0} | count 0 + +%struct.obj = type { i8*, %struct.obj* } + +declare void @g() gc "shadow-stack" + +define void @f(i8* %o) gc "shadow-stack" { +entry: + %root = alloca i8* + call void @llvm.gcroot(i8** %root, i8* null) + store i8* %o, i8** %root + call void @g() + ret void +} + +declare void @llvm.gcroot(i8**, i8*) -- cgit v1.2.3