22 files changed, 1047 insertions, 683 deletions
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h
index ae5bf0bc20..6a937986fd 100644
--- a/include/llvm/MC/MCAtom.h
+++ b/include/llvm/MC/MCAtom.h
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCAtom.h - MCAtom class ---------------------*- C++ -*-===//
+//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,7 +9,7 @@
 //
 // This file contains the declaration of the MCAtom class, which is used to
 // represent a contiguous region in a decoded object that is uniformly data or
-// instructions;
+// instructions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,45 +24,169 @@ namespace llvm {
 
 class MCModule;
 
-/// MCData - An entry in a data MCAtom.
-// NOTE: This may change to a more complex type in the future.
-typedef uint8_t MCData;
+class MCAtom;
+class MCTextAtom;
+class MCDataAtom;
 
 /// MCAtom - Represents a contiguous range of either instructions (a TextAtom)
 /// or data (a DataAtom).  Address ranges are expressed as _closed_ intervals.
 class MCAtom {
-  friend class MCModule;
-  typedef enum { TextAtom, DataAtom } AtomType;
-
-  AtomType Type;
+public:
+  virtual ~MCAtom() {}
+
+  enum AtomKind { TextAtom, DataAtom };
+  AtomKind getKind() const { return Kind; }
+
+  /// \brief Get the start address of the atom.
+  uint64_t getBeginAddr() const { return Begin; }
+  /// \brief Get the end address, i.e. the last one inside the atom.
+  uint64_t getEndAddr() const { return End; }
+
+  /// \name Atom modification methods:
+  /// When modifying a TextAtom, keep instruction boundaries in mind.
+  /// For instance, split must me given the start address of an instruction.
+  /// @{
+
+  /// \brief Splits the atom in two at a given address.
+  /// \param SplitPt Address at which to start a new atom, splitting this one.
+  /// \returns The newly created atom starting at \p SplitPt.
+  virtual MCAtom *split(uint64_t SplitPt) = 0;
+
+  /// \brief Truncates an atom, discarding everything after \p TruncPt.
+  /// \param TruncPt Last byte address to be contained in this atom.
+  virtual void truncate(uint64_t TruncPt) = 0;
+  /// @}
+
+  /// \name Naming:
+  ///
+  /// This is mostly for display purposes, and may contain anything that hints
+  /// at what the atom contains: section or symbol name, BB start address, ..
+  /// @{
+  StringRef getName() const { return Name; }
+  void setName(StringRef NewName) { Name = NewName.str(); }
+  /// @}
+
+protected:
+  const AtomKind Kind;
+  std::string Name;
   MCModule *Parent;
   uint64_t Begin, End;
 
-  std::vector<std::pair<uint64_t, MCInst> > Text;
-  std::vector<MCData> Data;
+  friend class MCModule;
+  MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E)
+    : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { }
+
+  /// \name Atom remapping helpers
+  /// @{
+
+  /// \brief Remap the atom, using the given range, updating Begin/End.
+  /// One or both of the bounds can remain the same, but overlapping with other
+  /// atoms in the module is still forbidden.
+  void remap(uint64_t NewBegin, uint64_t NewEnd);
+
+  /// \brief Remap the atom to prepare for a truncation at TruncPt.
+  /// Equivalent to:
+  /// \code
+  ///   // Bound checks
+  ///   remap(Begin, TruncPt);
+  /// \endcode
+  void remapForTruncate(uint64_t TruncPt);
+
+  /// \brief Remap the atom to prepare for a split at SplitPt.
+  /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}.
+  /// The current atom is truncated to \p LEnd.
+  void remapForSplit(uint64_t SplitPt,
+                     uint64_t &LBegin, uint64_t &LEnd,
+                     uint64_t &RBegin, uint64_t &REnd);
+  /// @}
+};
 
-  // Private constructor - only callable by MCModule
-  MCAtom(AtomType T, MCModule *P, uint64_t B, uint64_t E)
-    : Type(T), Parent(P), Begin(B), End(E) { }
+/// \name Text atom
+/// @{
+
+/// \brief An entry in an MCTextAtom: a disassembled instruction.
+/// NOTE: Both the Address and Size field are actually redundant when taken in
+/// the context of the text atom, and may better be exposed in an iterator
+/// instead of stored in the atom, which would replace this class.
+class MCDecodedInst {
+public:
+  MCInst Inst;
+  uint64_t Address;
+  uint64_t Size;
+  MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size)
+    : Inst(Inst), Address(Address), Size(Size) {}
+};
+
+/// \brief An atom consisting of disassembled instructions.
+class MCTextAtom : public MCAtom {
+private:
+  typedef std::vector<MCDecodedInst> InstListTy;
+  InstListTy Insts;
 
+  /// \brief The address of the next appended instruction, i.e., the
+  /// address immediately after the last instruction in the atom.
+  uint64_t NextInstAddress;
 public:
-  bool isTextAtom() const { return Type == TextAtom; }
-  bool isDataAtom() const { return Type == DataAtom; }
+  /// Append an instruction, expanding the atom if necessary.
+  void addInst(const MCInst &Inst, uint64_t Size);
+
+  /// \name Instruction list access
+  /// @{
+  typedef InstListTy::const_iterator const_iterator;
+  const_iterator begin() const { return Insts.begin(); }
+  const_iterator end()   const { return Insts.end(); }
+
+  const MCDecodedInst &back() const { return Insts.back(); }
+  const MCDecodedInst &at(size_t n) const { return Insts.at(n); }
+  uint64_t size() const { return Insts.size(); }
+  /// @}
+
+  /// \name Atom type specific split/truncate logic.
+  /// @{
+  MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+  void     truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+  /// @}
+
+  // Class hierarchy.
+  static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; }
+private:
+  friend class MCModule;
+  // Private constructor - only callable by MCModule
+  MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End)
+    : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {}
+};
+/// @}
+
+/// \name Data atom
+/// @{
+
+/// \brief An entry in an MCDataAtom.
+// NOTE: This may change to a more complex type in the future.
+typedef uint8_t MCData;
 
-  void addInst(const MCInst &I, uint64_t Address, unsigned Size);
+/// \brief An atom consising of a sequence of bytes.
+class MCDataAtom : public MCAtom {
+  std::vector<MCData> Data;
+
+public:
+  /// Append a data entry, expanding the atom if necessary.
   void addData(const MCData &D);
 
-  /// split - Splits the atom in two at a given address, which must align with
-  /// and instruction boundary if this is a TextAtom.  Returns the newly created
-  /// atom representing the high part of the split.
-  MCAtom *split(uint64_t SplitPt);
+  /// \name Atom type specific split/truncate logic.
+  /// @{
+  MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE;
+  void     truncate(uint64_t TruncPt) LLVM_OVERRIDE;
+  /// @}
 
-  /// truncate - Truncates an atom so that TruncPt is the last byte address
-  /// contained in the atom.
-  void truncate(uint64_t TruncPt);
+  // Class hierarchy.
+  static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; }
+private:
+  friend class MCModule;
+  // Private constructor - only callable by MCModule
+  MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End)
+    : MCAtom(DataAtom, P, Begin, End), Data(End - Begin) {}
 };
 
 }
 
 #endif
-
diff --git a/include/llvm/MC/MCFunction.h b/include/llvm/MC/MCFunction.h
new file mode 100644
index 0000000000..b85011eda7
--- /dev/null
+++ b/include/llvm/MC/MCFunction.h
@@ -0,0 +1,122 @@
+//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the data structures to hold a CFG reconstructed from
+// machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFUNCTION_H
+#define LLVM_MC_MCFUNCTION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInst.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class MCFunction;
+class MCModule;
+class MCTextAtom;
+
+/// \brief Basic block containing a sequence of disassembled instructions.
+/// The basic block is backed by an MCTextAtom, which holds the instructions,
+/// and the address range it covers.
+/// Create a basic block using MCFunction::createBlock.
+class MCBasicBlock {
+  const MCTextAtom *Insts;
+
+  // MCFunction owns the basic block.
+  MCFunction *Parent;
+  friend class MCFunction;
+  MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent);
+
+  /// \name Predecessors/Successors, to represent the CFG.
+  /// @{
+  typedef std::vector<const MCBasicBlock *> BasicBlockListTy;
+  BasicBlockListTy Successors;
+  BasicBlockListTy Predecessors;
+  /// @}
+public:
+
+  /// \brief Get the backing MCTextAtom, containing the instruction sequence.
+  const MCTextAtom *getInsts() const { return Insts; }
+
+  /// \name Get the owning MCFunction.
+  /// @{
+  const MCFunction *getParent() const { return Parent; }
+        MCFunction *getParent()       { return Parent; }
+  /// @}
+
+  /// MC CFG access: Predecessors/Successors.
+  /// @{
+  typedef BasicBlockListTy::const_iterator succ_const_iterator;
+  succ_const_iterator succ_begin() const { return Successors.begin(); }
+  succ_const_iterator succ_end()   const { return Successors.end(); }
+
+  typedef BasicBlockListTy::const_iterator pred_const_iterator;
+  pred_const_iterator pred_begin() const { return Predecessors.begin(); }
+  pred_const_iterator pred_end()   const { return Predecessors.end(); }
+
+  void addSuccessor(const MCBasicBlock *MCBB);
+  bool isSuccessor(const MCBasicBlock *MCBB) const;
+
+  void addPredecessor(const MCBasicBlock *MCBB);
+  bool isPredecessor(const MCBasicBlock *MCBB) const;
+  /// @}
+};
+
+/// \brief Represents a function in machine code, containing MCBasicBlocks.
+/// MCFunctions are created using MCModule::createFunction.
+class MCFunction {
+  MCFunction           (const MCFunction&) LLVM_DELETED_FUNCTION;
+  MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION;
+
+  std::string Name;
+  typedef std::vector<MCBasicBlock*> BasicBlockListTy;
+  BasicBlockListTy Blocks;
+
+  // MCModule owns the function.
+  friend class MCModule;
+  MCFunction(StringRef Name);
+public:
+  ~MCFunction();
+
+  /// \brief Create an MCBasicBlock backed by Insts and add it to this function.
+  /// \param Insts Sequence of straight-line code backing the basic block.
+  /// \returns The newly created basic block.
+  MCBasicBlock &createBlock(const MCTextAtom &Insts);
+
+  StringRef getName() const { return Name; }
+
+  /// \name Access to the function's basic blocks. No ordering is enforced.
+  /// @{
+  /// \brief Get the entry point basic block.
+  const MCBasicBlock *getEntryBlock() const { return front(); }
+        MCBasicBlock *getEntryBlock()       { return front(); }
+
+  // NOTE: Dereferencing iterators gives pointers, so maybe a list is best here.
+  typedef BasicBlockListTy::const_iterator const_iterator;
+  typedef BasicBlockListTy::      iterator       iterator;
+  const_iterator begin() const { return Blocks.begin(); }
+        iterator begin()       { return Blocks.begin(); }
+  const_iterator   end() const { return Blocks.end(); }
+        iterator   end()       { return Blocks.end(); }
+
+  const MCBasicBlock* front() const { return Blocks.front(); }
+        MCBasicBlock* front()       { return Blocks.front(); }
+  const MCBasicBlock*  back() const { return Blocks.back(); }
+        MCBasicBlock*  back()       { return Blocks.back(); }
+  /// @}
+};
+
+}
+
+#endif
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index acad6336ac..17bfd1582a 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -52,10 +52,15 @@ public:
     return Info->get(Inst.getOpcode()).isReturn();
   }
 
+  virtual bool isTerminator(const MCInst &Inst) const {
+    return Info->get(Inst.getOpcode()).isTerminator();
+  }
+
   /// evaluateBranch - Given a branch instruction try to get the address the
-  /// branch targets. Otherwise return -1.
-  virtual uint64_t
-  evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size) const;
+  /// branch targets. Return true on success, and the address in Target.
+  virtual bool
+  evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                 uint64_t &Target) const;
 };
 
 }
diff --git a/include/llvm/MC/MCModule.h b/include/llvm/MC/MCModule.h
index 755fa025fb..02f8ca05b4 100644
--- a/include/llvm/MC/MCModule.h
+++ b/include/llvm/MC/MCModule.h
@@ -15,44 +15,93 @@
 #ifndef LLVM_MC_MCMODULE_H
 #define LLVM_MC_MCMODULE_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
+#include <vector>
 
 namespace llvm {
 
 class MCAtom;
+class MCDataAtom;
+class MCFunction;
+class MCObjectDisassembler;
+class MCTextAtom;
 
-/// MCModule - This class represent a completely disassembled object file or
-/// executable.  It comprises a list of MCAtom's, and a branch target table.
-/// Each atom represents a contiguous range of either instructions or data.
+/// \brief A completely disassembled object file or executable.
+/// It comprises a list of MCAtom's, each representing a contiguous range of
+/// either instructions or data.
+/// An MCModule is created using MCObjectDisassembler::buildModule.
 class MCModule {
-  /// AtomAllocationTracker - An MCModule owns its component MCAtom's, so it
-  /// must track them in order to ensure they are properly freed as atoms are
-  /// merged or otherwise manipulated.
-  SmallPtrSet<MCAtom*, 8> AtomAllocationTracker;
+  /// \name Atom tracking
+  /// @{
 
-  /// OffsetMap - Efficiently maps offset ranges to MCAtom's.
-  IntervalMap<uint64_t, MCAtom*> OffsetMap;
-
-  /// BranchTargetMap - Maps offsets that are determined to be branches and
-  /// can be statically resolved to their target offsets.
-  DenseMap<uint64_t, MCAtom*> BranchTargetMap;
+  /// \brief Atoms in this module, sorted by begin address.
+  /// FIXME: This doesn't handle overlapping atoms (which happen when a basic
+  /// block starts in the middle of an instruction of another basic block.)
+  typedef std::vector<MCAtom*> AtomListTy;
+  AtomListTy Atoms;
 
   friend class MCAtom;
-
-  /// remap - Update the interval mapping for an MCAtom.
+  /// \brief Remap \p Atom to the given range, and update its Begin/End fields.
+  /// \param Atom An atom belonging to this module.
+  /// An atom should always use this method to update its bounds, because this
+  /// enables the owning MCModule to keep track of its atoms.
   void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd);
 
+  /// \brief Insert an atom in the module, using its Begin and End addresses.
+  void map(MCAtom *NewAtom);
+  /// @}
+
+  /// \name Function tracking
+  /// @{
+  typedef std::vector<MCFunction*> FunctionListTy;
+  FunctionListTy Functions;
+  /// @}
+
+  MCModule           (const MCModule &) LLVM_DELETED_FUNCTION;
+  MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION;
+
+  // MCObjectDisassembler creates MCModules.
+  friend class MCObjectDisassembler;
+  MCModule() : Atoms() { }
+
 public:
-  MCModule(IntervalMap<uint64_t, MCAtom*>::Allocator &A) : OffsetMap(A) { }
+  ~MCModule();
 
-  /// createAtom - Creates a new MCAtom covering the specified offset range.
-  MCAtom *createAtom(MCAtom::AtomType Type, uint64_t Begin, uint64_t End);
+  /// \name Create a new MCAtom covering the specified offset range.
+  /// @{
+  MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End);
+  MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End);
+  /// @}
+
+  /// \name Access to the owned atom list, ordered by begin address.
+  /// @{
+  const MCAtom *findAtomContaining(uint64_t Addr) const;
+        MCAtom *findAtomContaining(uint64_t Addr);
+
+  typedef AtomListTy::const_iterator const_atom_iterator;
+  typedef AtomListTy::      iterator       atom_iterator;
+  const_atom_iterator atom_begin() const { return Atoms.begin(); }
+        atom_iterator atom_begin()       { return Atoms.begin(); }
+  const_atom_iterator atom_end()   const { return Atoms.end(); }
+        atom_iterator atom_end()         { return Atoms.end(); }
+  /// @}
+
+  /// \name Create a new MCFunction.
+  MCFunction *createFunction(const StringRef &Name);
+
+  /// \name Access to the owned function list.
+  /// @{
+  typedef FunctionListTy::const_iterator const_func_iterator;
+  typedef FunctionListTy::      iterator       func_iterator;
+  const_func_iterator func_begin() const { return Functions.begin(); }
+        func_iterator func_begin()       { return Functions.begin(); }
+  const_func_iterator func_end()   const { return Functions.end(); }
+        func_iterator func_end()         { return Functions.end(); }
+  /// @}
 };
 
 }
 
 #endif
-
diff --git a/include/llvm/MC/MCObjectDisassembler.h b/include/llvm/MC/MCObjectDisassembler.h
new file mode 100644
index 0000000000..749a54e7f4
--- /dev/null
+++ b/include/llvm/MC/MCObjectDisassembler.h
@@ -0,0 +1,69 @@
+//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCObjectDisassembler class, which
+// can be used to construct an MCModule and an MC CFG from an ObjectFile.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
+#define LLVM_MC_MCOBJECTDISASSEMBLER_H
+
+namespace llvm {
+
+namespace object {
+  class ObjectFile;
+}
+
+class MCBasicBlock;
+class MCDisassembler;
+class MCFunction;
+class MCInstrAnalysis;
+class MCModule;
+
+/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
+/// This class builds on MCDisassembler to disassemble whole sections, creating
+/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
+/// It can also be used to create a control flow graph consisting of MCFunctions
+/// and MCBasicBlocks.
+class MCObjectDisassembler {
+  const object::ObjectFile &Obj;
+  const MCDisassembler &Dis;
+  const MCInstrAnalysis &MIA;
+
+public:
+  MCObjectDisassembler(const object::ObjectFile &Obj,
+                       const MCDisassembler &Dis,
+                       const MCInstrAnalysis &MIA);
+
+  /// \brief Build an MCModule, creating atoms and optionally functions.
+  /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
+  /// If withCFG is false, the MCModule built only contains atoms, representing
+  /// what was found in the object file. If withCFG is true, MCFunctions are
+  /// created, containing MCBasicBlocks. All text atoms are split to form basic
+  /// block atoms, which then each back an MCBasicBlock.
+  MCModule *buildModule(bool withCFG = false);
+
+private:
+  /// \brief Fill \p Module by creating an atom for each section.
+  /// This could be made much smarter, using information like symbols, but also
+  /// format-specific features, like mach-o function_start or data_in_code LCs.
+  void buildSectionAtoms(MCModule *Module);
+
+  /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
+  /// \param Module An MCModule returned by buildModule, with no CFG.
+  /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
+  /// When the CFG is built, contiguous instructions that were previously in a
+  /// single MCTextAtom will be split in multiple basic block atoms.
+  void buildCFG(MCModule *Module);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Support/StringRefMemoryObject.h b/include/llvm/Support/StringRefMemoryObject.h
new file mode 100644
index 0000000000..a0ef35a9e1
--- /dev/null
+++ b/include/llvm/Support/StringRefMemoryObject.h
@@ -0,0 +1,42 @@
+//===- llvm/Support/StringRefMemoryObject.h ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the StringRefMemObject class, a simple
+// wrapper around StringRef implementing the MemoryObject interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_STRINGREFMEMORYOBJECT_H
+#define LLVM_SUPPORT_STRINGREFMEMORYOBJECT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryObject.h"
+
+namespace llvm {
+
+/// StringRefMemoryObject - Simple StringRef-backed MemoryObject
+class StringRefMemoryObject : public MemoryObject {
+  StringRef Bytes;
+  uint64_t Base;
+public:
+  StringRefMemoryObject(StringRef Bytes, uint64_t Base = 0)
+    : Bytes(Bytes), Base(Base) {}
+
+  uint64_t getBase() const { return Base; }
+  uint64_t getExtent() const { return Bytes.size(); }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const;
+  int readBytes(uint64_t Addr, uint64_t Size,
+                uint8_t *Buf, uint64_t *Copied) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 5377c5c8d8..89e2aaf48b 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMMC
   MCELF.cpp
   MCELFObjectTargetWriter.cpp
   MCELFStreamer.cpp
+  MCFunction.cpp
   MCExpr.cpp
   MCExternalSymbolizer.cpp
   MCInst.cpp
@@ -26,6 +27,7 @@ add_llvm_library(LLVMMC
   MCModule.cpp
   MCNullStreamer.cpp
   MCObjectFileInfo.cpp
+  MCObjectDisassembler.cpp
   MCObjectStreamer.cpp
   MCObjectSymbolizer.cpp
   MCObjectWriter.cpp
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp
index d71444324f..2626b39db4 100644
--- a/lib/MC/MCAtom.cpp
+++ b/lib/MC/MCAtom.cpp
@@ -10,88 +10,101 @@
 #include "llvm/MC/MCAtom.h"
 #include "llvm/MC/MCModule.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <iterator>
 
 using namespace llvm;
 
-void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) {
-  assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!");
-
-  assert(Address < End+Size &&
-         "Instruction not contiguous with end of atom!");
-  if (Address > End)
-    Parent->remap(this, Begin, End+Size);
-
-  Text.push_back(std::make_pair(Address, I));
+void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
+  Parent->remap(this, NewBegin, NewEnd);
 }
 
-void MCAtom::addData(const MCData &D) {
-  assert(Type == DataAtom && "Trying to add MCData to a non-data atom!");
-  Parent->remap(this, Begin, End+1);
-
-  Data.push_back(D);
+void MCAtom::remapForTruncate(uint64_t TruncPt) {
+  assert((TruncPt >= Begin && TruncPt < End) &&
+         "Truncation point not contained in atom!");
+  remap(Begin, TruncPt);
 }
 
-MCAtom *MCAtom::split(uint64_t SplitPt) {
+void MCAtom::remapForSplit(uint64_t SplitPt,
+                           uint64_t &LBegin, uint64_t &LEnd,
+                           uint64_t &RBegin, uint64_t &REnd) {
   assert((SplitPt > Begin && SplitPt <= End) &&
          "Splitting at point not contained in atom!");
 
   // Compute the new begin/end points.
-  uint64_t LeftBegin = Begin;
-  uint64_t LeftEnd = SplitPt - 1;
-  uint64_t RightBegin = SplitPt;
-  uint64_t RightEnd = End;
+  LBegin = Begin;
+  LEnd = SplitPt - 1;
+  RBegin = SplitPt;
+  REnd = End;
 
   // Remap this atom to become the lower of the two new ones.
-  Parent->remap(this, LeftBegin, LeftEnd);
+  remap(LBegin, LEnd);
+}
 
-  // Create a new atom for the higher atom.
-  MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd);
+// MCDataAtom
 
-  // Split the contents of the original atom between it and the new one.  The
-  // precise method depends on whether this is a data or a text atom.
-  if (isDataAtom()) {
-    std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin);
+void MCDataAtom::addData(const MCData &D) {
+  Data.push_back(D);
+  if (Data.size() > Begin - End)
+    remap(Begin, End + 1);
+}
 
-    assert(I != Data.end() && "Split point not found in range!");
+void MCDataAtom::truncate(uint64_t TruncPt) {
+  remapForTruncate(TruncPt);
 
-    std::copy(I, Data.end(), RightAtom->Data.end());
-    Data.erase(I, Data.end());
-  } else if (isTextAtom()) {
-    std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+  Data.resize(TruncPt - Begin + 1);
+}
 
-    while (I != Text.end() && I->first < SplitPt) ++I;
+MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
+  uint64_t LBegin, LEnd, RBegin, REnd;
+  remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
 
-    assert(I != Text.end() && "Split point not found in disassembly!");
-    assert(I->first == SplitPt &&
-           "Split point does not fall on instruction boundary!");
+  MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
+  RightAtom->setName(getName());
 
-    std::copy(I, Text.end(), RightAtom->Text.end());
-    Text.erase(I, Text.end());
-  } else
-    llvm_unreachable("Unknown atom type!");
+  std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
+  assert(I != Data.end() && "Split point not found in range!");
 
+  std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
+  Data.erase(I, Data.end());
   return RightAtom;
 }
 
-void MCAtom::truncate(uint64_t TruncPt) {
-  assert((TruncPt >= Begin && TruncPt < End) &&
-         "Truncation point not contained in atom!");
+// MCTextAtom
 
-  Parent->remap(this, Begin, TruncPt);
+void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
+  if (NextInstAddress > End)
+    remap(Begin, NextInstAddress);
+  Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
+  NextInstAddress += Size;
+}
 
-  if (isDataAtom()) {
-    Data.resize(TruncPt - Begin + 1);
-  } else if (isTextAtom()) {
-    std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+void MCTextAtom::truncate(uint64_t TruncPt) {
+  remapForTruncate(TruncPt);
 
-    while (I != Text.end() && I->first <= TruncPt) ++I;
+  InstListTy::iterator I = Insts.begin();
+  while (I != Insts.end() && I->Address <= TruncPt) ++I;
 
-    assert(I != Text.end() && "Truncation point not found in disassembly!");
-    assert(I->first == TruncPt+1 &&
-           "Truncation point does not fall on instruction boundary");
+  assert(I != Insts.end() && "Truncation point not found in disassembly!");
+  assert(I->Address == TruncPt + 1 &&
+         "Truncation point does not fall on instruction boundary");
 
-    Text.erase(I, Text.end());
-  } else
-    llvm_unreachable("Unknown atom type!");
+  Insts.erase(I, Insts.end());
 }
 
+MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
+  uint64_t LBegin, LEnd, RBegin, REnd;
+  remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+  MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
+  RightAtom->setName(getName());
+
+  InstListTy::iterator I = Insts.begin();
+  while (I != Insts.end() && I->Address < SplitPt) ++I;
+  assert(I != Insts.end() && "Split point not found in disassembly!");
+  assert(I->Address == SplitPt &&
+         "Split point does not fall on instruction boundary!");
+
+  std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
+  Insts.erase(I, Insts.end());
+  return RightAtom;
+}
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp
new file mode 100644
index 0000000000..2665d3e167
--- /dev/null
+++ b/lib/MC/MCFunction.cpp
@@ -0,0 +1,55 @@
+//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+// MCFunction
+
+MCFunction::MCFunction(StringRef Name)
+  : Name(Name)
+{}
+
+MCFunction::~MCFunction() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+}
+
+MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
+  Blocks.push_back(new MCBasicBlock(TA, this));
+  return *Blocks.back();
+}
+
+// MCBasicBlock
+
+MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
+  : Insts(&Insts), Parent(Parent)
+{}
+
+void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
+  Successors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
+  return std::find(Successors.begin(), Successors.end(),
+                   MCBB) != Successors.end();
+}
+
+void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
+  Predecessors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
+  return std::find(Predecessors.begin(), Predecessors.end(),
+                   MCBB) != Predecessors.end();
+}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 7736702f35..2d8336d77a 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -10,12 +10,13 @@
 #include "llvm/MC/MCInstrAnalysis.h"
 using namespace llvm;
 
-uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
-                                         uint64_t Size) const {
+bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                                     uint64_t Size, uint64_t &Target) const {
   if (Inst.getNumOperands() == 0 ||
       Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
-    return -1ULL;
+    return false;
 
   int64_t Imm = Inst.getOperand(0).getImm();
-  return Addr+Size+Imm;
+  Target = Addr+Size+Imm;
+  return true;
 }
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index f563160833..50bac476fa 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -7,39 +7,92 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAtom.h"
 #include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCFunction.h"
+#include <algorithm>
 
 using namespace llvm;
 
-MCAtom *MCModule::createAtom(MCAtom::AtomType Type,
-                             uint64_t Begin, uint64_t End) {
+static bool AtomComp(const MCAtom *L, uint64_t Addr) {
+  return L->getEndAddr() < Addr;
+}
+
+void MCModule::map(MCAtom *NewAtom) {
+  uint64_t Begin = NewAtom->Begin,
+           End = NewAtom->End;
+
   assert(Begin < End && "Creating MCAtom with endpoints reversed?");
 
   // Check for atoms already covering this range.
-  IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin);
-  assert((!I.valid() || I.start() < End) && "Offset range already occupied!");
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Begin, AtomComp);
+  assert((I == atom_end() || (*I)->getBeginAddr() > End)
+         && "Offset range already occupied!");
 
-  // Create the new atom and add it to our maps.
-  MCAtom *NewAtom = new MCAtom(Type, this, Begin, End);
-  AtomAllocationTracker.insert(NewAtom);
-  OffsetMap.insert(Begin, End, NewAtom);
+  // Insert the new atom to the list.
+  Atoms.insert(I, NewAtom);
+}
+
+MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
+  MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
+  map(NewAtom);
+  return NewAtom;
+}
+
+MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
+  MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
+  map(NewAtom);
   return NewAtom;
 }
 
 // remap - Update the interval mapping for an atom.
 void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
   // Find and erase the old mapping.
-  IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin);
-  assert(I.valid() && "Atom offset not found in module!");
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Atom->Begin, AtomComp);
+  assert(I != atom_end() && "Atom offset not found in module!");
   assert(*I == Atom && "Previous atom mapping was invalid!");
-  I.erase();
+  Atoms.erase(I);
 
   // Insert the new mapping.
-  OffsetMap.insert(NewBegin, NewEnd, Atom);
+  AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
+                                               NewBegin, AtomComp);
+  Atoms.insert(NewI, Atom);
 
   // Update the atom internal bounds.
   Atom->Begin = NewBegin;
   Atom->End = NewEnd;
 }
 
+const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
+  AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                                  Addr, AtomComp);
+  if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+    return *I;
+  return 0;
+}
+
+MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Addr, AtomComp);
+  if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+    return *I;
+  return 0;
+}
+
+MCFunction *MCModule::createFunction(const StringRef &Name) {
+  Functions.push_back(new MCFunction(Name));
+  return Functions.back();
+}
+
+MCModule::~MCModule() {
+  for (AtomListTy::iterator AI = atom_begin(),
+                            AE = atom_end();
+                            AI != AE; ++AI)
+    delete *AI;
+  for (FunctionListTy::iterator FI = func_begin(),
+                                FE = func_end();
+                                FI != FE; ++FI)
+    delete *FI;
+}
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp
new file mode 100644
index 0000000000..bb3de1779e
--- /dev/null
+++ b/lib/MC/MCObjectDisassembler.cpp
@@ -0,0 +1,216 @@
+//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <set>
+
+using namespace llvm;
+using namespace object;
+
+MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
+                                           const MCDisassembler &Dis,
+                                           const MCInstrAnalysis &MIA)
+  : Obj(Obj), Dis(Dis), MIA(MIA) {}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+  MCModule *Module = new MCModule;
+  buildSectionAtoms(Module);
+  if (withCFG)
+    buildCFG(Module);
+  return Module;
+}
+
+void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
+  error_code ec;
+  for (section_iterator SI = Obj.begin_sections(),
+                        SE = Obj.end_sections();
+                        SI != SE;
+                        SI.increment(ec)) {
+    if (ec) break;
+
+    bool isText; SI->isText(isText);
+    bool isData; SI->isData(isData);
+    if (!isData && !isText)
+      continue;
+
+    uint64_t StartAddr; SI->getAddress(StartAddr);
+    uint64_t SecSize; SI->getSize(SecSize);
+    if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
+      continue;
+
+    StringRef Contents; SI->getContents(Contents);
+    StringRefMemoryObject memoryObject(Contents);
+
+    // We don't care about things like non-file-backed sections yet.
+    if (Contents.size() != SecSize || !SecSize)
+      continue;
+    uint64_t EndAddr = StartAddr + SecSize - 1;
+
+    StringRef SecName; SI->getName(SecName);
+
+    if (isText) {
+      MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr);
+      Text->setName(SecName);
+      uint64_t InstSize;
+      for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+        MCInst Inst;
+        if (Dis.getInstruction(Inst, InstSize, memoryObject, Index,
+                               nulls(), nulls()))
+          Text->addInst(Inst, InstSize);
+        else
+          // We don't care about splitting mixed atoms either.
+          llvm_unreachable("Couldn't disassemble instruction in atom.");
+      }
+
+    } else {
+      MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
+      Data->setName(SecName);
+      for (uint64_t Index = 0; Index < SecSize; ++Index)
+        Data->addData(Contents[Index]);
+    }
+  }
+}
+
+namespace {
+  struct BBInfo;
+  typedef std::set<BBInfo*> BBInfoSetTy;
+
+  struct BBInfo {
+    MCTextAtom *Atom;
+    MCBasicBlock *BB;
+    BBInfoSetTy Succs;
+    BBInfoSetTy Preds;
+
+    void addSucc(BBInfo &Succ) {
+      Succs.insert(&Succ);
+      Succ.Preds.insert(this);
+    }
+  };
+}
+
+void MCObjectDisassembler::buildCFG(MCModule *Module) {
+  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+  BBInfoByAddrTy BBInfos;
+  typedef std::set<uint64_t> AddressSetTy;
+  AddressSetTy Splits;
+  AddressSetTy Calls;
+
+  assert(Module->func_begin() == Module->func_end()
+         && "Module already has a CFG!");
+
+  // First, determine the basic block boundaries and call targets.
+  for (MCModule::atom_iterator AI = Module->atom_begin(),
+                               AE = Module->atom_end();
+       AI != AE; ++AI) {
+    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+    if (!TA) continue;
+    Calls.insert(TA->getBeginAddr());
+    for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+         II != IE; ++II) {
+      if (MIA.isTerminator(II->Inst))
+        Splits.insert(II->Address + II->Size);
+      uint64_t Target;
+      if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
+        if (MIA.isCall(II->Inst))
+          Calls.insert(Target);
+        Splits.insert(Target);
+      }
+    }
+  }
+
+  // Split text atoms into basic block atoms.
+  for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
+       SI != SE; ++SI) {
+    MCAtom *A = Module->findAtomContaining(*SI);
+    if (!A) continue;
+    MCTextAtom *TA = cast<MCTextAtom>(A);
+    BBInfos[TA->getBeginAddr()].Atom = TA;
+    if (TA->getBeginAddr() == *SI)
+      continue;
+    MCTextAtom *NewAtom = TA->split(*SI);
+    BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
+    StringRef BBName = TA->getName();
+    BBName = BBName.substr(0, BBName.find_last_of(':'));
+    NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
+  }
+
+  // Compute succs/preds.
+  for (MCModule::atom_iterator AI = Module->atom_begin(),
+                               AE = Module->atom_end();
+                               AI != AE; ++AI) {
+    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+    if (!TA) continue;
+    BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
+    const MCDecodedInst &LI = TA->back();
+    if (MIA.isBranch(LI.Inst)) {
+      uint64_t Target;
+      if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
+        CurBB.addSucc(BBInfos[Target]);
+      if (MIA.isConditionalBranch(LI.Inst))
+        CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+    } else if (!MIA.isTerminator(LI.Inst))
+      CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+  }
+
+
+  // Create functions and basic blocks.
+  for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
+       CI != CE; ++CI) {
+    BBInfo &BBI = BBInfos[*CI];
+    if (!BBI.Atom) continue;
+
+    MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
+
+    // Create MCBBs.
+    SmallSetVector<BBInfo*, 16> Worklist;
+    Worklist.insert(&BBI);
+    for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+      BBInfo *BBI = Worklist[WI];
+      if (!BBI->Atom)
+        continue;
+      BBI->BB = &MCFN.createBlock(*BBI->Atom);
+      // Add all predecessors and successors to the worklist.
+      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+                                 SI != SE; ++SI)
+        Worklist.insert(*SI);
+      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+                                 PI != PE; ++PI)
+        Worklist.insert(*PI);
+    }
+
+    // Set preds/succs.
+    for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+      BBInfo *BBI = Worklist[WI];
+      MCBasicBlock *MCBB = BBI->BB;
+      if (!MCBB)
+        continue;
+      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+                                 SI != SE; ++SI)
+        MCBB->addSuccessor((*SI)->BB);
+      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+                                 PI != PE; ++PI)
+        MCBB->addPredecessor((*PI)->BB);
+    }
+  }
+}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index fbc38183bc..6cc8ab236a 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_library(LLVMSupport
   StringMap.cpp
   StringPool.cpp
   StringRef.cpp
+  StringRefMemoryObject.cpp
   SystemUtils.cpp
   Timer.cpp
   ToolOutputFile.cpp
diff --git a/lib/Support/StringRefMemoryObject.cpp b/lib/Support/StringRefMemoryObject.cpp
new file mode 100644
index 0000000000..5db11e918c
--- /dev/null
+++ b/lib/Support/StringRefMemoryObject.cpp
@@ -0,0 +1,34 @@
+//===- lib/Support/StringRefMemoryObject.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringRefMemoryObject.h"
+
+using namespace llvm;
+
+int StringRefMemoryObject::readByte(uint64_t Addr, uint8_t *Byte) const {
+  if (Addr >= Base + getExtent() || Addr < Base)
+    return -1;
+  *Byte = Bytes[Addr - Base];
+  return 0;
+}
+
+int StringRefMemoryObject::readBytes(uint64_t Addr,
+                                     uint64_t Size,
+                                     uint8_t *Buf,
+                                     uint64_t *Copied) const {
+  if (Addr >= Base + getExtent() || Addr < Base)
+    return -1;
+  uint64_t Offset = Addr - Base;
+  if (Size > getExtent() - Offset)
+    Size = getExtent() - Offset;
+  memcpy(Buf, Bytes.data() + Offset, Size);
+  if (Copied)
+    *Copied = Size;
+  return 0;
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index eeec608820..48d48190fd 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -136,17 +136,17 @@ public:
     return MCInstrAnalysis::isConditionalBranch(Inst);
   }
 
-  uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
-                          uint64_t Size) const {
+  bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                      uint64_t Size, uint64_t &Target) const {
     unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
     // FIXME: We only handle PCRel branches for now.
     if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
         != MCOI::OPERAND_PCREL)
-      return -1ULL;
+      return false;
 
     int64_t Imm = Inst.getOperand(LblOperand).getImm();
-
-    return Addr + Imm;
+    Target = Addr + Imm;
+    return true;
   }
 };
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 52fc28d11d..c092801a67 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -240,15 +240,16 @@ public:
     return MCInstrAnalysis::isConditionalBranch(Inst);
   }
 
-  uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
-                          uint64_t Size) const {
+  bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                      uint64_t Size, uint64_t &Target) const {
     // We only handle PCRel branches for now.
     if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
-      return -1ULL;
+      return false;
 
     int64_t Imm = Inst.getOperand(0).getImm();
     // FIXME: This is not right for thumb.
-    return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+    Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
+    return true;
   }
 };
 
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
index 0c49d0b457..e983ec92fb 100644
--- a/tools/llvm-objdump/CMakeLists.txt
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -12,5 +12,4 @@ add_llvm_tool(llvm-objdump
   COFFDump.cpp
   ELFDump.cpp
   MachODump.cpp
-  MCFunction.cpp
   )
diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp
deleted file mode 100644
index 5c67f1b70a..0000000000
--- a/tools/llvm-objdump/MCFunction.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- MCFunction.cpp ----------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the algorithm to break down a region of machine code
-// into basic blocks and try to reconstruct a CFG from it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCFunction.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <set>
-using namespace llvm;
-
-MCFunction
-MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
-                                 const MemoryObject &Region, uint64_t Start,
-                                 uint64_t End, const MCInstrAnalysis *Ana,
-                                 raw_ostream &DebugOut,
-                                 SmallVectorImpl<uint64_t> &Calls) {
-  std::vector<MCDecodedInst> Instructions;
-  std::set<uint64_t> Splits;
-  Splits.insert(Start);
-  uint64_t Size;
-
-  MCFunction f(Name);
-
-  {
-  DenseSet<uint64_t> VisitedInsts;
-  SmallVector<uint64_t, 16> WorkList;
-  WorkList.push_back(Start);
-  // Disassemble code and gather basic block split points.
-  while (!WorkList.empty()) {
-    uint64_t Index = WorkList.pop_back_val();
-    if (VisitedInsts.find(Index) != VisitedInsts.end())
-      continue; // Already visited this location.
-
-    for (;Index < End; Index += Size) {
-      VisitedInsts.insert(Index);
-
-      MCInst Inst;
-      if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
-        Instructions.push_back(MCDecodedInst(Index, Size, Inst));
-        if (Ana->isBranch(Inst)) {
-          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
-          if (targ != -1ULL && targ == Index+Size)
-            continue; // Skip nop jumps.
-
-          // If we could determine the branch target, make a note to start a
-          // new basic block there and add the target to the worklist.
-          if (targ != -1ULL) {
-            Splits.insert(targ);
-            WorkList.push_back(targ);
-            WorkList.push_back(Index+Size);
-          }
-          Splits.insert(Index+Size);
-          break;
-        } else if (Ana->isReturn(Inst)) {
-          // Return instruction. This basic block ends here.
-          Splits.insert(Index+Size);
-          break;
-        } else if (Ana->isCall(Inst)) {
-          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
-          // Add the call to the call list if the destination is known.
-          if (targ != -1ULL && targ != Index+Size)
-            Calls.push_back(targ);
-        }
-      } else {
-        errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
-        if (Size == 0)
-          Size = 1; // skip illegible bytes
-      }
-    }
-  }
-  }
-
-  // Make sure the instruction list is sorted.
-  std::sort(Instructions.begin(), Instructions.end());
-
-  // Create basic blocks.
-  unsigned ii = 0, ie = Instructions.size();
-  for (std::set<uint64_t>::iterator spi = Splits.begin(),
-       spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
-    MCBasicBlock BB;
-    uint64_t BlockEnd = *llvm::next(spi);
-    // Add instructions to the BB.
-    for (; ii != ie; ++ii) {
-      if (Instructions[ii].Address < *spi ||
-          Instructions[ii].Address >= BlockEnd)
-        break;
-      BB.addInst(Instructions[ii]);
-    }
-    f.addBlock(*spi, BB);
-  }
-
-  std::sort(f.Blocks.begin(), f.Blocks.end());
-
-  // Calculate successors of each block.
-  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
-    MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
-    if (BB.getInsts().empty()) continue;
-    const MCDecodedInst &Inst = BB.getInsts().back();
-
-    if (Ana->isBranch(Inst.Inst)) {
-      uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
-      if (targ == -1ULL) {
-        // Indirect branch. Bail and add all blocks of the function as a
-        // successor.
-        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
-          BB.addSucc(i->first);
-      } else if (targ != Inst.Address+Inst.Size)
-        BB.addSucc(targ);
-      // Conditional branches can also fall through to the next block.
-      if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
-        BB.addSucc(llvm::next(i)->first);
-    } else {
-      // No branch. Fall through to the next block.
-      if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
-        BB.addSucc(llvm::next(i)->first);
-    }
-  }
-
-  return f;
-}
diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h
deleted file mode 100644
index 6d3a548d48..0000000000
--- a/tools/llvm-objdump/MCFunction.h
+++ /dev/null
@@ -1,100 +0,0 @@
-//===-- MCFunction.h ------------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the data structures to hold a CFG reconstructed from
-// machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECTDUMP_MCFUNCTION_H
-#define LLVM_OBJECTDUMP_MCFUNCTION_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/MC/MCInst.h"
-#include <map>
-
-namespace llvm {
-
-class MCDisassembler;
-class MCInstrAnalysis;
-class MemoryObject;
-class raw_ostream;
-
-/// MCDecodedInst - Small container to hold an MCInst and associated info like
-/// address and size.
-struct MCDecodedInst {
-  uint64_t Address;
-  uint64_t Size;
-  MCInst Inst;
-
-  MCDecodedInst() {}
-  MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst)
-    : Address(Address), Size(Size), Inst(Inst) {}
-
-  bool operator<(const MCDecodedInst &RHS) const {
-    return Address < RHS.Address;
-  }
-};
-
-/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing
-/// MCBasicBlocks.
-class MCBasicBlock {
-  std::vector<MCDecodedInst> Insts;
-  typedef DenseSet<uint64_t> SetTy;
-  SetTy Succs;
-public:
-  ArrayRef<MCDecodedInst> getInsts() const { return Insts; }
-
-  typedef SetTy::const_iterator succ_iterator;
-  succ_iterator succ_begin() const { return Succs.begin(); }
-  succ_iterator succ_end() const { return Succs.end(); }
-
-  bool contains(uint64_t Addr) const { return Succs.count(Addr); }
-
-  void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); }
-  void addSucc(uint64_t Addr) { Succs.insert(Addr); }
-
-  bool operator<(const MCBasicBlock &RHS) const {
-    return Insts.size() < RHS.Insts.size();
-  }
-};
-
-/// MCFunction - Represents a named function in machine code, containing
-/// multiple MCBasicBlocks.
-class MCFunction {
-  const StringRef Name;
-  // Keep BBs sorted by address.
-  typedef std::vector<std::pair<uint64_t, MCBasicBlock> > MapTy;
-  MapTy Blocks;
-public:
-  MCFunction(StringRef Name) : Name(Name) {}
-
-  // Create an MCFunction from a region of binary machine code.
-  static MCFunction
-  createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
-                       const MemoryObject &Region, uint64_t Start, uint64_t End,
-                       const MCInstrAnalysis *Ana, raw_ostream &DebugOut,
-                       SmallVectorImpl<uint64_t> &Calls);
-
-  typedef MapTy::const_iterator iterator;
-  iterator begin() const { return Blocks.begin(); }
-  iterator end() const { return Blocks.end(); }
-
-  StringRef getName() const { return Name; }
-
-  MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) {
-    Blocks.push_back(std::make_pair(Address, BB));
-    return Blocks.back().second;
-  }
-};
-
-}
-
-#endif
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index c7e5cc1ede..03a383eb12 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
-#include "MCFunction.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -44,10 +44,6 @@ using namespace llvm;
 using namespace object;
 
 static cl::opt<bool>
-  CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
-                      " write it to a graphviz file (MachO-only)"));
-
-static cl::opt<bool>
   UseDbg("g", cl::desc("Print line information from debug info if available"));
 
 static cl::opt<std::string>
@@ -91,99 +87,6 @@ struct SymbolSorter {
   }
 };
 
-// Print additional information about an address, if available.
-static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections,
-                        const MachOObjectFile *MachOObj, raw_ostream &OS) {
-  for (unsigned i = 0; i != Sections.size(); ++i) {
-    uint64_t SectAddr = 0, SectSize = 0;
-    Sections[i].getAddress(SectAddr);
-    Sections[i].getSize(SectSize);
-    uint64_t addr = SectAddr;
-    if (SectAddr <= Address &&
-        SectAddr + SectSize > Address) {
-      StringRef bytes, name;
-      Sections[i].getContents(bytes);
-      Sections[i].getName(name);
-      // Print constant strings.
-      if (!name.compare("__cstring"))
-        OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
-      // Print constant CFStrings.
-      if (!name.compare("__cfstring"))
-        OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
-    }
-  }
-}
-
-typedef std::map<uint64_t, MCFunction*> FunctionMapTy;
-typedef SmallVector<MCFunction, 16> FunctionListTy;
-static void createMCFunctionAndSaveCalls(StringRef Name,
-                                         const MCDisassembler *DisAsm,
-                                         MemoryObject &Object, uint64_t Start,
-                                         uint64_t End,
-                                         MCInstrAnalysis *InstrAnalysis,
-                                         uint64_t Address,
-                                         raw_ostream &DebugOut,
-                                         FunctionMapTy &FunctionMap,
-                                         FunctionListTy &Functions) {
-  SmallVector<uint64_t, 16> Calls;
-  MCFunction f =
-    MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End,
-                                     InstrAnalysis, DebugOut, Calls);
-  Functions.push_back(f);
-  FunctionMap[Address] = &Functions.back();
-
-  // Add the gathered callees to the map.
-  for (unsigned i = 0, e = Calls.size(); i != e; ++i)
-    FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
-}
-
-// Write a graphviz file for the CFG inside an MCFunction.
-static void emitDOTFile(const char *FileName, const MCFunction &f,
-                        MCInstPrinter *IP) {
-  // Start a new dot file.
-  std::string Error;
-  raw_fd_ostream Out(FileName, Error);
-  if (!Error.empty()) {
-    errs() << "llvm-objdump: warning: " << Error << '\n';
-    return;
-  }
-
-  Out << "digraph " << f.getName() << " {\n";
-  Out << "graph [ rankdir = \"LR\" ];\n";
-  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
-    bool hasPreds = false;
-    // Only print blocks that have predecessors.
-    // FIXME: Slow.
-    for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
-        ++pi)
-      if (pi->second.contains(i->first)) {
-        hasPreds = true;
-        break;
-      }
-
-    if (!hasPreds && i != f.begin())
-      continue;
-
-    Out << '"' << i->first << "\" [ label=\"<a>";
-    // Print instructions.
-    for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
-        ++ii) {
-      // Escape special chars and print the instruction in mnemonic form.
-      std::string Str;
-      raw_string_ostream OS(Str);
-      IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
-      Out << DOT::EscapeString(OS.str()) << '|';
-    }
-    Out << "<o>\" shape=\"record\" ];\n";
-
-    // Add edges.
-    for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
-        se = i->second.succ_end(); si != se; ++si)
-      Out << i->first << ":o -> " << *si <<":a\n";
-  }
-  Out << "}\n";
-}
-
 static void
 getSectionsAndSymbols(const macho::Header Header,
                       MachOObjectFile *MachOObj,
@@ -272,6 +175,12 @@ static void DisassembleInputMachO2(StringRef Filename,
 
   macho::Header Header = MachOOF->getHeader();
 
+  // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to
+  // determine function locations will eventually go in MCObjectDisassembler.
+  // FIXME: Using the -cfg command line option, this code used to be able to
+  // annotate relocations with the referenced symbol's name, and if this was
+  // inside a __[cf]string section, the data it points to. This is now replaced
+  // by the upcoming MCSymbolizer, which needs the appropriate setup done above.
   std::vector<SectionRef> Sections;
   std::vector<SymbolRef> Symbols;
   SmallVector<uint64_t, 8> FoundFns;
@@ -308,31 +217,24 @@ static void DisassembleInputMachO2(StringRef Filename,
     diContext.reset(DIContext::getDWARFContext(DbgObj));
   }
 
-  FunctionMapTy FunctionMap;
-  FunctionListTy Functions;
-
   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
+
+    bool SectIsText = false;
+    Sections[SectIdx].isText(SectIsText);
+    if (SectIsText == false)
+      continue;
+
     StringRef SectName;
     if (Sections[SectIdx].getName(SectName) ||
         SectName != "__text")
       continue; // Skip non-text sections
 
     DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
+
     StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
     if (SegmentName != "__TEXT")
       continue;
 
-    // Insert the functions from the function starts segment into our map.
-    uint64_t VMAddr;
-    Sections[SectIdx].getAddress(VMAddr);
-    for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) {
-      StringRef SectBegin;
-      Sections[SectIdx].getContents(SectBegin);
-      uint64_t Offset = (uint64_t)SectBegin.data();
-      FunctionMap.insert(std::make_pair(VMAddr + FoundFns[i]-Offset,
-                                        (MCFunction*)0));
-    }
-
     StringRef Bytes;
     Sections[SectIdx].getContents(Bytes);
     StringRefMemoryObject memoryObject(Bytes);
@@ -403,52 +305,39 @@ static void DisassembleInputMachO2(StringRef Filename,
 
       symbolTableWorked = true;
 
-      if (!CFG) {
-        // Normal disassembly, print addresses, bytes and mnemonic form.
-        StringRef SymName;
-        Symbols[SymIdx].getName(SymName);
-
-        outs() << SymName << ":\n";
-        DILineInfo lastLine;
-        for (uint64_t Index = Start; Index < End; Index += Size) {
-          MCInst Inst;
-
-          if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
-                                     DebugOut, nulls())) {
-            uint64_t SectAddress = 0;
-            Sections[SectIdx].getAddress(SectAddress);
-            outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
-
-            DumpBytes(StringRef(Bytes.data() + Index, Size));
-            IP->printInst(&Inst, outs(), "");
-
-            // Print debug info.
-            if (diContext) {
-              DILineInfo dli =
-                diContext->getLineInfoForAddress(SectAddress + Index);
-              // Print valid line info if it changed.
-              if (dli != lastLine && dli.getLine() != 0)
-                outs() << "\t## " << dli.getFileName() << ':'
-                       << dli.getLine() << ':' << dli.getColumn();
-              lastLine = dli;
-            }
-            outs() << "\n";
-          } else {
-            errs() << "llvm-objdump: warning: invalid instruction encoding\n";
-            if (Size == 0)
-              Size = 1; // skip illegible bytes
+      outs() << SymName << ":\n";
+      DILineInfo lastLine;
+      for (uint64_t Index = Start; Index < End; Index += Size) {
+        MCInst Inst;
+
+        if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
+                                   DebugOut, nulls())) {
+          uint64_t SectAddress = 0;
+          Sections[SectIdx].getAddress(SectAddress);
+          outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
+
+          DumpBytes(StringRef(Bytes.data() + Index, Size));
+          IP->printInst(&Inst, outs(), "");
+
+          // Print debug info.
+          if (diContext) {
+            DILineInfo dli =
+              diContext->getLineInfoForAddress(SectAddress + Index);
+            // Print valid line info if it changed.
+            if (dli != lastLine && dli.getLine() != 0)
+              outs() << "\t## " << dli.getFileName() << ':'
+                << dli.getLine() << ':' << dli.getColumn();
+            lastLine = dli;
           }
+          outs() << "\n";
+        } else {
+          errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+          if (Size == 0)
+            Size = 1; // skip illegible bytes
         }
-      } else {
-        // Create CFG and use it for disassembly.
-        StringRef SymName;
-        Symbols[SymIdx].getName(SymName);
-        createMCFunctionAndSaveCalls(
-            SymName, DisAsm.get(), memoryObject, Start, End,
-            InstrAnalysis.get(), Start, DebugOut, FunctionMap, Functions);
       }
     }
-    if (!CFG && !symbolTableWorked) {
+    if (!symbolTableWorked) {
       // Reading the symbol table didn't work, disassemble the whole section. 
       uint64_t SectAddress;
       Sections[SectIdx].getAddress(SectAddress);
@@ -471,142 +360,5 @@ static void DisassembleInputMachO2(StringRef Filename,
         }
       }
     }
-
-    if (CFG) {
-      if (!symbolTableWorked) {
-        // Reading the symbol table didn't work, create a big __TEXT symbol.
-        uint64_t SectSize = 0, SectAddress = 0;
-        Sections[SectIdx].getSize(SectSize);
-        Sections[SectIdx].getAddress(SectAddress);
-        createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject,
-                                     0, SectSize,
-                                     InstrAnalysis.get(),
-                                     SectAddress, DebugOut,
-                                     FunctionMap, Functions);
-      }
-      for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
-           me = FunctionMap.end(); mi != me; ++mi)
-        if (mi->second == 0) {
-          // Create functions for the remaining callees we have gathered,
-          // but we didn't find a name for them.
-          uint64_t SectSize = 0;
-          Sections[SectIdx].getSize(SectSize);
-
-          SmallVector<uint64_t, 16> Calls;
-          MCFunction f =
-            MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
-                                             memoryObject, mi->first,
-                                             SectSize,
-                                             InstrAnalysis.get(), DebugOut,
-                                             Calls);
-          Functions.push_back(f);
-          mi->second = &Functions.back();
-          for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
-            std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0);
-            if (FunctionMap.insert(p).second)
-              mi = FunctionMap.begin();
-          }
-        }
-
-      DenseSet<uint64_t> PrintedBlocks;
-      for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) {
-        MCFunction &f = Functions[ffi];
-        for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
-          if (!PrintedBlocks.insert(fi->first).second)
-            continue; // We already printed this block.
-
-          // We assume a block has predecessors when it's the first block after
-          // a symbol.
-          bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end();
-
-          // See if this block has predecessors.
-          // FIXME: Slow.
-          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
-              ++pi)
-            if (pi->second.contains(fi->first)) {
-              hasPreds = true;
-              break;
-            }
-
-          uint64_t SectSize = 0, SectAddress;
-          Sections[SectIdx].getSize(SectSize);
-          Sections[SectIdx].getAddress(SectAddress);
-
-          // No predecessors, this is a data block. Print as .byte directives.
-          if (!hasPreds) {
-            uint64_t End = llvm::next(fi) == fe ? SectSize :
-                                                  llvm::next(fi)->first;
-            outs() << "# " << End-fi->first << " bytes of data:\n";
-            for (unsigned pos = fi->first; pos != End; ++pos) {
-              outs() << format("%8x:\t", SectAddress + pos);
-              DumpBytes(StringRef(Bytes.data() + pos, 1));
-              outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
-            }
-            continue;
-          }
-
-          if (fi->second.contains(fi->first)) // Print a header for simple loops
-            outs() << "# Loop begin:\n";
-
-          DILineInfo lastLine;
-          // Walk over the instructions and print them.
-          for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
-               ++ii) {
-            const MCDecodedInst &Inst = fi->second.getInsts()[ii];
-
-            // If there's a symbol at this address, print its name.
-            if (FunctionMap.find(SectAddress + Inst.Address) !=
-                FunctionMap.end())
-              outs() << FunctionMap[SectAddress + Inst.Address]-> getName()
-                     << ":\n";
-
-            outs() << format("%8" PRIx64 ":\t", SectAddress + Inst.Address);
-            DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
-
-            if (fi->second.contains(fi->first)) // Indent simple loops.
-              outs() << '\t';
-
-            IP->printInst(&Inst.Inst, outs(), "");
-
-            // Look for relocations inside this instructions, if there is one
-            // print its target and additional information if available.
-            for (unsigned j = 0; j != Relocs.size(); ++j)
-              if (Relocs[j].first >= SectAddress + Inst.Address &&
-                  Relocs[j].first < SectAddress + Inst.Address + Inst.Size) {
-                StringRef SymName;
-                uint64_t Addr;
-                Relocs[j].second.getAddress(Addr);
-                Relocs[j].second.getName(SymName);
-
-                outs() << "\t# " << SymName << ' ';
-                DumpAddress(Addr, Sections, MachOOF, outs());
-              }
-
-            // If this instructions contains an address, see if we can evaluate
-            // it and print additional information.
-            uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst,
-                                                          Inst.Address,
-                                                          Inst.Size);
-            if (targ != -1ULL)
-              DumpAddress(targ, Sections, MachOOF, outs());
-
-            // Print debug info.
-            if (diContext) {
-              DILineInfo dli =
-                diContext->getLineInfoForAddress(SectAddress + Inst.Address);
-              // Print valid line info if it changed.
-              if (dli != lastLine && dli.getLine() != 0)
-                outs() << "\t## " << dli.getFileName() << ':'
-                       << dli.getLine() << ':' << dli.getColumn();
-              lastLine = dli;
-            }
-
-            outs() << '\n';
-          }
-        }
-
-        emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get());
-      }
-    }
   }
 }
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 570ec7ed6f..d8611d8b3d 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -17,22 +17,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
-#include "MCFunction.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAtom.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCObjectDisassembler.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectSymbolizer.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCRelocationInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/MachO.h"
@@ -131,6 +135,10 @@ static cl::opt<bool>
 Symbolize("symbolize", cl::desc("When disassembling instructions, "
                                 "try to symbolize operands."));
 
+static cl::opt<bool>
+CFG("cfg", cl::desc("Create a CFG for every function found in the object"
+                      " and write it to a graphviz file"));
+
 static StringRef ToolName;
 
 bool llvm::error(error_code ec) {
@@ -169,7 +177,51 @@ static const Target *getTarget(const ObjectFile *Obj = NULL) {
   return TheTarget;
 }
 
-void llvm::StringRefMemoryObject::anchor() { }
+// Write a graphviz file for the CFG inside an MCFunction.
+static void emitDOTFile(const char *FileName, const MCFunction &f,
+                        MCInstPrinter *IP) {
+  // Start a new dot file.
+  std::string Error;
+  raw_fd_ostream Out(FileName, Error);
+  if (!Error.empty()) {
+    errs() << "llvm-objdump: warning: " << Error << '\n';
+    return;
+  }
+
+  Out << "digraph \"" << f.getName() << "\" {\n";
+  Out << "graph [ rankdir = \"LR\" ];\n";
+  for (MCFunction::const_iterator i = f.begin(), e = f.end(); i != e; ++i) {
+    // Only print blocks that have predecessors.
+    bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
+
+    if (!hasPreds && i != f.begin())
+      continue;
+
+    Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
+    // Print instructions.
+    for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
+        ++ii) {
+      if (ii != 0) // Not the first line, start a new row.
+        Out << '|';
+      if (ii + 1 == ie) // Last line, add an end id.
+        Out << "<o>";
+
+      // Escape special chars and print the instruction in mnemonic form.
+      std::string Str;
+      raw_string_ostream OS(Str);
+      IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
+      Out << DOT::EscapeString(OS.str());
+    }
+    Out << "\" shape=\"record\" ];\n";
+
+    // Add edges.
+    for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
+        se = (*i)->succ_end(); si != se; ++si)
+      Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
+          << (*si)->getInsts()->getBeginAddr() << ":a\n";
+  }
+  Out << "}\n";
+}
 
 void llvm::DumpBytes(StringRef bytes) {
   static const char hex_rep[] = "0123456789abcdef";
@@ -269,6 +321,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     }
   }
 
+  OwningPtr<const MCInstrAnalysis>
+    MIA(TheTarget->createMCInstrAnalysis(MII.get()));
+
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
   OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
       AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
@@ -278,6 +333,34 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     return;
   }
 
+  if (CFG) {
+    OwningPtr<MCObjectDisassembler> OD(
+      new MCObjectDisassembler(*Obj, *DisAsm, *MIA));
+    OwningPtr<MCModule> Mod(OD->buildModule(/* withCFG */ true));
+    for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
+                                       AE = Mod->atom_end();
+                                       AI != AE; ++AI) {
+      outs() << "Atom " << (*AI)->getName() << ": \n";
+      if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
+        for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+             II != IE;
+             ++II) {
+          IP->printInst(&II->Inst, outs(), "");
+          outs() << "\n";
+        }
+      }
+    }
+    for (MCModule::const_func_iterator FI = Mod->func_begin(),
+                                       FE = Mod->func_end();
+                                       FI != FE; ++FI) {
+      static int filenum = 0;
+      emitDOTFile((Twine((*FI)->getName()) + "_" +
+                   utostr(filenum++) + ".dot").str().c_str(),
+                    **FI, IP.get());
+    }
+  }
+
+
   error_code ec;
   for (section_iterator i = Obj->begin_sections(),
                         e = Obj->end_sections();
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index 3c62240f8f..87f19ba257 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -13,7 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
 
 namespace llvm {
 
@@ -35,25 +35,6 @@ void DisassembleInputMachO(StringRef Filename);
 void printCOFFUnwindInfo(const object::COFFObjectFile* o);
 void printELFFileHeader(const object::ObjectFile *o);
 
-class StringRefMemoryObject : public MemoryObject {
-  virtual void anchor();
-  StringRef Bytes;
-  uint64_t Base;
-public:
-  StringRefMemoryObject(StringRef bytes, uint64_t Base = 0)
-    : Bytes(bytes), Base(Base) {}
-
-  uint64_t getBase() const { return Base; }
-  uint64_t getExtent() const { return Bytes.size(); }
-
-  int readByte(uint64_t Addr, uint8_t *Byte) const {
-    if (Addr >= Base + getExtent() || Addr < Base)
-      return -1;
-    *Byte = Bytes[Addr - Base];
-    return 0;
-  }
-};
-
 }
 
 #endif