summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGordon Henriksen <gordonhenriksen@mac.com>2007-09-29 02:13:43 +0000
committerGordon Henriksen <gordonhenriksen@mac.com>2007-09-29 02:13:43 +0000
commit364caf0e19e570d00cfd03d9dd3fcda21fb2e459 (patch)
tree3112ac858755fdb62fa0c94bed97f99676a67507
parentd070d1e56fbfdad752342838dda39e14582ccad5 (diff)
downloadllvm-364caf0e19e570d00cfd03d9dd3fcda21fb2e459.tar.gz
llvm-364caf0e19e570d00cfd03d9dd3fcda21fb2e459.tar.bz2
llvm-364caf0e19e570d00cfd03d9dd3fcda21fb2e459.tar.xz
Collector is the base class for garbage collection code generators.
This version enhances the previous patch to add root initialization as discussed here: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20070910/053455.html Collector gives its subclasses control over generic algorithms: unsigned NeededSafePoints; //< Bitmask of required safe points. bool CustomReadBarriers; //< Default is to insert loads. bool CustomWriteBarriers; //< Default is to insert stores. bool CustomRoots; //< Default is to pass through to backend. bool InitRoots; //< If set, roots are nulled during lowering. It also has callbacks which collectors can hook: /// If any of the actions are set to Custom, this is expected to /// be overriden to create a transform to lower those actions to /// LLVM IR. virtual Pass *createCustomLoweringPass() const; /// beginAssembly/finishAssembly - Emit module metadata as /// assembly code. virtual void beginAssembly(Module &M, std::ostream &OS, AsmPrinter &AP, const TargetAsmInfo &TAI) const; virtual void finishAssembly(Module &M, CollectorModuleMetadata &CMM, std::ostream &OS, AsmPrinter &AP, const TargetAsmInfo &TAI) const; Various other independent algorithms could be implemented, but were not necessary for the initial two collectors. Some examples are listed here: http://llvm.org/docs/GarbageCollection.html#collector-algos git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42466 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/Collector.h133
-rw-r--r--lib/CodeGen/Collector.cpp353
-rw-r--r--lib/CodeGen/README.txt38
3 files changed, 524 insertions, 0 deletions
diff --git a/include/llvm/CodeGen/Collector.h b/include/llvm/CodeGen/Collector.h
new file mode 100644
index 0000000000..664e13c62b
--- /dev/null
+++ b/include/llvm/CodeGen/Collector.h
@@ -0,0 +1,133 @@
+//===-- llvm/CodeGen/Collector.h - Garbage collection -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Gordon Henriksen and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// GCInfo records sufficient information about a machine function to enable
+// accurate garbage collectors. Specifically:
+//
+// - Safe points
+// Garbage collection is only possible at certain points in code. Code
+// generators should record points:
+//
+// - At and after any call to a subroutine
+// - Before returning from the current function
+// - Before backwards branches (loops)
+//
+// - Roots
+// When a reference to a GC-allocated object exists on the stack, it must be
+// stored in an alloca registered with llvm.gcoot.
+//
+// This generic information should used by ABI-specific passes to emit support
+// tables for the runtime garbage collector.
+//
+// GCSafePointPass identifies the GC safe points in the machine code. (Roots are
+// identified in SelectionDAGISel.)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_COLLECTOR_H
+#define LLVM_CODEGEN_COLLECTOR_H
+
+#include "llvm/CodeGen/CollectorMetadata.h"
+#include <iosfwd>
+
+namespace llvm {
+
+ class AsmPrinter;
+ class FunctionPassManager;
+ class PassManager;
+ class TargetAsmInfo;
+
+
+ /// Collector describes a garbage collector's code generation requirements,
+ /// and provides overridable hooks for those needs which cannot be abstractly
+ /// described.
+ class Collector {
+ protected:
+ unsigned NeededSafePoints; //< Bitmask of required safe points.
+ bool CustomReadBarriers; //< Default is to insert loads.
+ bool CustomWriteBarriers; //< Default is to insert stores.
+ bool CustomRoots; //< Default is to pass through to backend.
+ bool InitRoots; //< If set, roots are nulled during lowering.
+
+ /// If any of the actions are set to Custom, this is expected to be
+ /// overriden to create a transform to lower those actions to LLVM IR.
+ virtual Pass *createCustomLoweringPass() const;
+
+ public:
+ Collector();
+
+ virtual ~Collector();
+
+
+ /// True if this collector requires safe points of any kind. By default,
+ /// none are recorded.
+ bool needsSafePoints() const { return NeededSafePoints != 0; }
+
+ /// True if the collector requires the given kind of safe point. By default,
+ /// none are recorded.
+ bool needsSafePoint(GC::PointKind Kind) const {
+ return (NeededSafePoints & 1 << Kind) != 0;
+ }
+
+ /// By default, write barriers are replaced with simple store instructions.
+ /// If true, then addPassesToCustomLowerIntrinsics must instead process
+ /// them.
+ bool customWriteBarrier() const { return CustomWriteBarriers; }
+
+ /// By default, read barriers are replaced with simple load instructions.
+ /// If true, then addPassesToCustomLowerIntrinsics must instead process
+ /// them.
+ bool customReadBarrier() const { return CustomReadBarriers; }
+
+ /// By default, roots are left for the code generator. If Custom, then
+ /// addPassesToCustomLowerIntrinsics must add passes to delete them.
+ bool customRoots() const { return CustomRoots; }
+
+ /// If set, gcroot intrinsics should initialize their allocas to null. This
+ /// is necessary for most collectors.
+ bool initializeRoots() const { return InitRoots; }
+
+
+ /// Adds LLVM IR transforms to handle collection intrinsics. By default,
+ /// read- and write barriers are replaced with direct memory accesses, and
+ /// roots are passed on to the code generator.
+ void addLoweringPasses(FunctionPassManager &PM) const;
+
+ /// Same as addLoweringPasses(FunctionPassManager &), except uses a
+ /// PassManager for compatibility with unusual backends (such as MSIL or
+ /// CBackend).
+ void addLoweringPasses(PassManager &PM) const;
+
+ /// Adds target-independent MachineFunction pass to mark safe points. This
+ /// is added very late during code generation, just prior to output, and
+ /// importantly after all CFG transformations (like branch folding).
+ void addGenericMachineCodePass(FunctionPassManager &PM,
+ const TargetMachine &TM, bool Fast) const;
+
+ /// beginAssembly/finishAssembly - Emit module metadata as assembly code.
+ virtual void beginAssembly(Module &M, std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) const;
+ virtual void finishAssembly(Module &M, CollectorModuleMetadata &CMM,
+ std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) const;
+
+ private:
+ bool NeedsDefaultLoweringPass() const;
+ bool NeedsCustomLoweringPass() const;
+
+ };
+
+
+ /// If set, the code generator should generate garbage collection as specified
+ /// by the collector properties.
+ extern const Collector *TheCollector; // FIXME: Find a better home!
+
+}
+
+#endif
diff --git a/lib/CodeGen/Collector.cpp b/lib/CodeGen/Collector.cpp
new file mode 100644
index 0000000000..29dc50420b
--- /dev/null
+++ b/lib/CodeGen/Collector.cpp
@@ -0,0 +1,353 @@
+//===-- Collector.cpp - Garbage collection infrastructure -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Gordon Henriksen and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Collector.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+
+ /// This pass rewrites calls to the llvm.gcread or llvm.gcwrite intrinsics,
+ /// replacing them with simple loads and stores as directed by the Collector.
+ /// This is useful for most garbage collectors.
+ class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass {
+ const Collector &Coll;
+
+ /// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the
+ /// llvm.gc* intrinsics.
+ Function *GCRootInt, *GCReadInt, *GCWriteInt;
+
+ static bool CouldBecomeSafePoint(Instruction *I);
+ static void InsertRootInitializers(Function &F,
+ AllocaInst **Roots, unsigned Count);
+
+ public:
+ static char ID;
+
+ LowerIntrinsics(const Collector &GC);
+ const char *getPassName() const;
+
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+ };
+
+
+ /// This is a target-independent pass over the machine function representation
+ /// to identify safe points for the garbage collector in the machine code. It
+ /// inserts labels at safe points and populates the GCInfo class.
+ class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass {
+ const Collector &Coll;
+ const TargetMachine &Targ;
+
+ CollectorMetadata *MD;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+ MachineFrameInfo *MFI;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ unsigned InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+ public:
+ static char ID;
+
+ MachineCodeAnalysis(const Collector &C, const TargetMachine &T);
+ const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &MF);
+ };
+
+}
+
+// -----------------------------------------------------------------------------
+
+const Collector *llvm::TheCollector = 0;
+
+Collector::Collector() :
+ NeededSafePoints(0),
+ CustomReadBarriers(false),
+ CustomWriteBarriers(false),
+ CustomRoots(false),
+ InitRoots(true)
+{}
+
+Collector::~Collector() {}
+
+void Collector::addLoweringPasses(FunctionPassManager &PM) const {
+ if (NeedsDefaultLoweringPass())
+ PM.add(new LowerIntrinsics(*this));
+
+ if (NeedsCustomLoweringPass())
+ PM.add(createCustomLoweringPass());
+}
+
+void Collector::addLoweringPasses(PassManager &PM) const {
+ if (NeedsDefaultLoweringPass())
+ PM.add(new LowerIntrinsics(*this));
+
+ if (NeedsCustomLoweringPass())
+ PM.add(createCustomLoweringPass());
+}
+
+void Collector::addGenericMachineCodePass(FunctionPassManager &PM,
+ const TargetMachine &TM,
+ bool Fast) const {
+ if (needsSafePoints())
+ PM.add(new MachineCodeAnalysis(*this, TM));
+}
+
+bool Collector::NeedsDefaultLoweringPass() const {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !customWriteBarrier()
+ || !customReadBarrier()
+ || initializeRoots();
+}
+
+bool Collector::NeedsCustomLoweringPass() const {
+ // Custom lowering is only necessary if enabled for some action.
+ return customWriteBarrier()
+ || customReadBarrier()
+ || customRoots();
+}
+
+Pass *Collector::createCustomLoweringPass() const {
+ cerr << "Collector must override createCustomLoweringPass.\n";
+ abort();
+ return 0;
+}
+
+void Collector::beginAssembly(Module &M, std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) const {
+ // Default is no action.
+}
+
+void Collector::finishAssembly(Module &M, CollectorModuleMetadata &CMM,
+ std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) const {
+ // Default is no action.
+}
+
+// -----------------------------------------------------------------------------
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics(const Collector &C)
+ : FunctionPass((intptr_t)&ID), Coll(C),
+ GCRootInt(0), GCReadInt(0), GCWriteInt(0) {}
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, this pass does not do anything.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ GCReadInt = M.getFunction("llvm.gcread");
+ GCWriteInt = M.getFunction("llvm.gcwrite");
+ GCRootInt = M.getFunction("llvm.gcroot");
+ return false;
+}
+
+void LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP)) ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst*,16> InitedRoots;
+ for (; !CouldBecomeSafePoint(IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(
+ IntrinsicInst::StripPointerCasts(SI->getOperand(1))))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I))
+ new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+ cast<PointerType>((*I)->getType())->getElementType())),
+ *I, IP);
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<StoreInst>(I) || isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (unsigned IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for programs that do not declare the intrinsics.
+ if (!GCReadInt && !GCWriteInt && !GCRootInt) return false;
+
+ bool LowerWr = !Coll.customWriteBarrier();
+ bool LowerRd = !Coll.customReadBarrier();
+ bool InitRoots = Coll.initializeRoots();
+
+ SmallVector<AllocaInst*,32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (CallInst *CI = dyn_cast<CallInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ if (F == GCWriteInt && LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ } else if (F == GCReadInt && LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getOperand(2), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ } else if (F == GCRootInt && InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(cast<AllocaInst>(
+ IntrinsicInst::StripPointerCasts(CI->getOperand(1))));
+ } else {
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis(const Collector &C, const TargetMachine &T)
+ : MachineFunctionPass(intptr_t(&ID)), Coll(C), Targ(T) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+ return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<CollectorModuleMetadata>();
+}
+
+unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ unsigned Label = MMI->NextLabelID();
+ BuildMI(MBB, MI, TII->get(TargetInstrInfo::LABEL)).addImm(Label);
+ return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (Coll.needsSafePoint(GC::PreCall))
+ MD->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI));
+
+ if (Coll.needsSafePoint(GC::PostCall))
+ MD->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI));
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(),
+ BBE = MF.end(); BBI != BBE; ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(),
+ ME = BBI->end(); MI != ME; ++MI)
+ if (TII->isCall(MI->getOpcode()))
+ VisitCallPoint(*MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t OffsetAdjustment = MFI->getOffsetAdjustment();
+ uint64_t OffsetOfLocalArea = Targ.getFrameInfo()->getOffsetOfLocalArea();
+
+ for (CollectorMetadata::roots_iterator RI = MD->roots_begin(),
+ RE = MD->roots_end(); RI != RE; ++RI)
+ RI->StackOffset = MFI->getObjectOffset(RI->Num) + StackSize
+ - OffsetOfLocalArea + OffsetAdjustment;
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ if (!Coll.needsSafePoints())
+ return false;
+
+ MD = getAnalysis<CollectorModuleMetadata>().get(MF.getFunction());
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = MF.getTarget().getInstrInfo();
+ MFI = MF.getFrameInfo();
+
+ // Find the size of the stack frame.
+ MD->setFrameSize(MFI->getStackSize());
+
+ // Find all safe points.
+ FindSafePoints(MF);
+
+ // Find the stack offsets for all roots.
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 8767191320..8bd9a7af0d 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -148,3 +148,41 @@ It's not always a good idea to choose rematerialization over spilling. If all
the load / store instructions would be folded then spilling is cheaper because
it won't require new live intervals / registers. See 2003-05-31-LongShifts for
an example.
+
+//===---------------------------------------------------------------------===//
+
+Instead of unconditionally inserting a null initializer for every GC root when
+Collector::InitRoots is set, the collector infrastructure should get a little
+bit smarter and perform a trivial DSE of the initial basic block up to the
+first safe point.
+
+//===---------------------------------------------------------------------===//
+
+With a copying garbage collector, derived pointers must not be retained across
+collector safe points; the collector could move the objects and invalidate the
+derived pointer. This is bad enough in the first place, but safe points can
+crop up unpredictably. Consider:
+
+ %array = load { i32, [0 x %obj] }** %array_addr
+ %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n
+ %old = load %obj** %nth_el
+ %z = div i64 %x, %y
+ store %obj* %new, %obj** %nth_el
+
+If the i64 division is lowered to a libcall, then a safe point will (must)
+appear for the call site. If a collection occurs, %array and %nth_el no longer
+point into the correct object.
+
+The fix for this is to copy address calculations so that dependent pointers
+are never live across safe point boundaries. But the loads cannot be copied
+like this if there was an intervening store, so may be hard to get right.
+
+Only a concurrent mutator can trigger a collection at the libcall safe point.
+So single-threaded programs do not have this requirement, even with a copying
+collector. Still, LLVM optimizations would probably undo a front-end's careful
+work.
+
+//===---------------------------------------------------------------------===//
+
+The ocaml frametable structure supports liveness information. It would be good
+to support it.