6 files changed, 411 insertions, 71 deletions
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 5377c5c8d8..89e2aaf48b 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMMC
   MCELF.cpp
   MCELFObjectTargetWriter.cpp
   MCELFStreamer.cpp
+  MCFunction.cpp
   MCExpr.cpp
   MCExternalSymbolizer.cpp
   MCInst.cpp
@@ -26,6 +27,7 @@ add_llvm_library(LLVMMC
   MCModule.cpp
   MCNullStreamer.cpp
   MCObjectFileInfo.cpp
+  MCObjectDisassembler.cpp
   MCObjectStreamer.cpp
   MCObjectSymbolizer.cpp
   MCObjectWriter.cpp
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp
index d71444324f..2626b39db4 100644
--- a/lib/MC/MCAtom.cpp
+++ b/lib/MC/MCAtom.cpp
@@ -10,88 +10,101 @@
 #include "llvm/MC/MCAtom.h"
 #include "llvm/MC/MCModule.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <iterator>
 
 using namespace llvm;
 
-void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) {
-  assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!");
-
-  assert(Address < End+Size &&
-         "Instruction not contiguous with end of atom!");
-  if (Address > End)
-    Parent->remap(this, Begin, End+Size);
-
-  Text.push_back(std::make_pair(Address, I));
+void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
+  Parent->remap(this, NewBegin, NewEnd);
 }
 
-void MCAtom::addData(const MCData &D) {
-  assert(Type == DataAtom && "Trying to add MCData to a non-data atom!");
-  Parent->remap(this, Begin, End+1);
-
-  Data.push_back(D);
+void MCAtom::remapForTruncate(uint64_t TruncPt) {
+  assert((TruncPt >= Begin && TruncPt < End) &&
+         "Truncation point not contained in atom!");
+  remap(Begin, TruncPt);
 }
 
-MCAtom *MCAtom::split(uint64_t SplitPt) {
+void MCAtom::remapForSplit(uint64_t SplitPt,
+                           uint64_t &LBegin, uint64_t &LEnd,
+                           uint64_t &RBegin, uint64_t &REnd) {
   assert((SplitPt > Begin && SplitPt <= End) &&
          "Splitting at point not contained in atom!");
 
   // Compute the new begin/end points.
-  uint64_t LeftBegin = Begin;
-  uint64_t LeftEnd = SplitPt - 1;
-  uint64_t RightBegin = SplitPt;
-  uint64_t RightEnd = End;
+  LBegin = Begin;
+  LEnd = SplitPt - 1;
+  RBegin = SplitPt;
+  REnd = End;
 
   // Remap this atom to become the lower of the two new ones.
-  Parent->remap(this, LeftBegin, LeftEnd);
+  remap(LBegin, LEnd);
+}
 
-  // Create a new atom for the higher atom.
-  MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd);
+// MCDataAtom
 
-  // Split the contents of the original atom between it and the new one.  The
-  // precise method depends on whether this is a data or a text atom.
-  if (isDataAtom()) {
-    std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin);
+void MCDataAtom::addData(const MCData &D) {
+  Data.push_back(D);
+  if (Data.size() > Begin - End)
+    remap(Begin, End + 1);
+}
 
-    assert(I != Data.end() && "Split point not found in range!");
+void MCDataAtom::truncate(uint64_t TruncPt) {
+  remapForTruncate(TruncPt);
 
-    std::copy(I, Data.end(), RightAtom->Data.end());
-    Data.erase(I, Data.end());
-  } else if (isTextAtom()) {
-    std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+  Data.resize(TruncPt - Begin + 1);
+}
 
-    while (I != Text.end() && I->first < SplitPt) ++I;
+MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
+  uint64_t LBegin, LEnd, RBegin, REnd;
+  remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
 
-    assert(I != Text.end() && "Split point not found in disassembly!");
-    assert(I->first == SplitPt &&
-           "Split point does not fall on instruction boundary!");
+  MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
+  RightAtom->setName(getName());
 
-    std::copy(I, Text.end(), RightAtom->Text.end());
-    Text.erase(I, Text.end());
-  } else
-    llvm_unreachable("Unknown atom type!");
+  std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
+  assert(I != Data.end() && "Split point not found in range!");
 
+  std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
+  Data.erase(I, Data.end());
   return RightAtom;
 }
 
-void MCAtom::truncate(uint64_t TruncPt) {
-  assert((TruncPt >= Begin && TruncPt < End) &&
-         "Truncation point not contained in atom!");
+// MCTextAtom
 
-  Parent->remap(this, Begin, TruncPt);
+void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
+  if (NextInstAddress > End)
+    remap(Begin, NextInstAddress);
+  Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
+  NextInstAddress += Size;
+}
 
-  if (isDataAtom()) {
-    Data.resize(TruncPt - Begin + 1);
-  } else if (isTextAtom()) {
-    std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+void MCTextAtom::truncate(uint64_t TruncPt) {
+  remapForTruncate(TruncPt);
 
-    while (I != Text.end() && I->first <= TruncPt) ++I;
+  InstListTy::iterator I = Insts.begin();
+  while (I != Insts.end() && I->Address <= TruncPt) ++I;
 
-    assert(I != Text.end() && "Truncation point not found in disassembly!");
-    assert(I->first == TruncPt+1 &&
-           "Truncation point does not fall on instruction boundary");
+  assert(I != Insts.end() && "Truncation point not found in disassembly!");
+  assert(I->Address == TruncPt + 1 &&
+         "Truncation point does not fall on instruction boundary");
 
-    Text.erase(I, Text.end());
-  } else
-    llvm_unreachable("Unknown atom type!");
+  Insts.erase(I, Insts.end());
 }
 
+MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
+  uint64_t LBegin, LEnd, RBegin, REnd;
+  remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+  MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
+  RightAtom->setName(getName());
+
+  InstListTy::iterator I = Insts.begin();
+  while (I != Insts.end() && I->Address < SplitPt) ++I;
+  assert(I != Insts.end() && "Split point not found in disassembly!");
+  assert(I->Address == SplitPt &&
+         "Split point does not fall on instruction boundary!");
+
+  std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
+  Insts.erase(I, Insts.end());
+  return RightAtom;
+}
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp
new file mode 100644
index 0000000000..2665d3e167
--- /dev/null
+++ b/lib/MC/MCFunction.cpp
@@ -0,0 +1,55 @@
+//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+// MCFunction
+
+MCFunction::MCFunction(StringRef Name)
+  : Name(Name)
+{}
+
+MCFunction::~MCFunction() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+}
+
+MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
+  Blocks.push_back(new MCBasicBlock(TA, this));
+  return *Blocks.back();
+}
+
+// MCBasicBlock
+
+MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
+  : Insts(&Insts), Parent(Parent)
+{}
+
+void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
+  Successors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
+  return std::find(Successors.begin(), Successors.end(),
+                   MCBB) != Successors.end();
+}
+
+void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
+  Predecessors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
+  return std::find(Predecessors.begin(), Predecessors.end(),
+                   MCBB) != Predecessors.end();
+}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 7736702f35..2d8336d77a 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -10,12 +10,13 @@
 #include "llvm/MC/MCInstrAnalysis.h"
 using namespace llvm;
 
-uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
-                                         uint64_t Size) const {
+bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                                     uint64_t Size, uint64_t &Target) const {
   if (Inst.getNumOperands() == 0 ||
       Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
-    return -1ULL;
+    return false;
 
   int64_t Imm = Inst.getOperand(0).getImm();
-  return Addr+Size+Imm;
+  Target = Addr+Size+Imm;
+  return true;
 }
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index f563160833..50bac476fa 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -7,39 +7,92 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAtom.h"
 #include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCFunction.h"
+#include <algorithm>
 
 using namespace llvm;
 
-MCAtom *MCModule::createAtom(MCAtom::AtomType Type,
-                             uint64_t Begin, uint64_t End) {
+static bool AtomComp(const MCAtom *L, uint64_t Addr) {
+  return L->getEndAddr() < Addr;
+}
+
+void MCModule::map(MCAtom *NewAtom) {
+  uint64_t Begin = NewAtom->Begin,
+           End = NewAtom->End;
+
   assert(Begin < End && "Creating MCAtom with endpoints reversed?");
 
   // Check for atoms already covering this range.
-  IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin);
-  assert((!I.valid() || I.start() < End) && "Offset range already occupied!");
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Begin, AtomComp);
+  assert((I == atom_end() || (*I)->getBeginAddr() > End)
+         && "Offset range already occupied!");
 
-  // Create the new atom and add it to our maps.
-  MCAtom *NewAtom = new MCAtom(Type, this, Begin, End);
-  AtomAllocationTracker.insert(NewAtom);
-  OffsetMap.insert(Begin, End, NewAtom);
+  // Insert the new atom to the list.
+  Atoms.insert(I, NewAtom);
+}
+
+MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
+  MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
+  map(NewAtom);
+  return NewAtom;
+}
+
+MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
+  MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
+  map(NewAtom);
   return NewAtom;
 }
 
 // remap - Update the interval mapping for an atom.
 void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
   // Find and erase the old mapping.
-  IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin);
-  assert(I.valid() && "Atom offset not found in module!");
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Atom->Begin, AtomComp);
+  assert(I != atom_end() && "Atom offset not found in module!");
   assert(*I == Atom && "Previous atom mapping was invalid!");
-  I.erase();
+  Atoms.erase(I);
 
   // Insert the new mapping.
-  OffsetMap.insert(NewBegin, NewEnd, Atom);
+  AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
+                                               NewBegin, AtomComp);
+  Atoms.insert(NewI, Atom);
 
   // Update the atom internal bounds.
   Atom->Begin = NewBegin;
   Atom->End = NewEnd;
 }
 
+const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
+  AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                                  Addr, AtomComp);
+  if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+    return *I;
+  return 0;
+}
+
+MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
+  AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+                                            Addr, AtomComp);
+  if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+    return *I;
+  return 0;
+}
+
+MCFunction *MCModule::createFunction(const StringRef &Name) {
+  Functions.push_back(new MCFunction(Name));
+  return Functions.back();
+}
+
+MCModule::~MCModule() {
+  for (AtomListTy::iterator AI = atom_begin(),
+                            AE = atom_end();
+                            AI != AE; ++AI)
+    delete *AI;
+  for (FunctionListTy::iterator FI = func_begin(),
+                                FE = func_end();
+                                FI != FE; ++FI)
+    delete *FI;
+}
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp
new file mode 100644
index 0000000000..bb3de1779e
--- /dev/null
+++ b/lib/MC/MCObjectDisassembler.cpp
@@ -0,0 +1,216 @@
+//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <set>
+
+using namespace llvm;
+using namespace object;
+
+MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
+                                           const MCDisassembler &Dis,
+                                           const MCInstrAnalysis &MIA)
+  : Obj(Obj), Dis(Dis), MIA(MIA) {}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+  MCModule *Module = new MCModule;
+  buildSectionAtoms(Module);
+  if (withCFG)
+    buildCFG(Module);
+  return Module;
+}
+
+void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
+  error_code ec;
+  for (section_iterator SI = Obj.begin_sections(),
+                        SE = Obj.end_sections();
+                        SI != SE;
+                        SI.increment(ec)) {
+    if (ec) break;
+
+    bool isText; SI->isText(isText);
+    bool isData; SI->isData(isData);
+    if (!isData && !isText)
+      continue;
+
+    uint64_t StartAddr; SI->getAddress(StartAddr);
+    uint64_t SecSize; SI->getSize(SecSize);
+    if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
+      continue;
+
+    StringRef Contents; SI->getContents(Contents);
+    StringRefMemoryObject memoryObject(Contents);
+
+    // We don't care about things like non-file-backed sections yet.
+    if (Contents.size() != SecSize || !SecSize)
+      continue;
+    uint64_t EndAddr = StartAddr + SecSize - 1;
+
+    StringRef SecName; SI->getName(SecName);
+
+    if (isText) {
+      MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr);
+      Text->setName(SecName);
+      uint64_t InstSize;
+      for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+        MCInst Inst;
+        if (Dis.getInstruction(Inst, InstSize, memoryObject, Index,
+                               nulls(), nulls()))
+          Text->addInst(Inst, InstSize);
+        else
+          // We don't care about splitting mixed atoms either.
+          llvm_unreachable("Couldn't disassemble instruction in atom.");
+      }
+
+    } else {
+      MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
+      Data->setName(SecName);
+      for (uint64_t Index = 0; Index < SecSize; ++Index)
+        Data->addData(Contents[Index]);
+    }
+  }
+}
+
+namespace {
+  struct BBInfo;
+  typedef std::set<BBInfo*> BBInfoSetTy;
+
+  struct BBInfo {
+    MCTextAtom *Atom;
+    MCBasicBlock *BB;
+    BBInfoSetTy Succs;
+    BBInfoSetTy Preds;
+
+    void addSucc(BBInfo &Succ) {
+      Succs.insert(&Succ);
+      Succ.Preds.insert(this);
+    }
+  };
+}
+
+void MCObjectDisassembler::buildCFG(MCModule *Module) {
+  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+  BBInfoByAddrTy BBInfos;
+  typedef std::set<uint64_t> AddressSetTy;
+  AddressSetTy Splits;
+  AddressSetTy Calls;
+
+  assert(Module->func_begin() == Module->func_end()
+         && "Module already has a CFG!");
+
+  // First, determine the basic block boundaries and call targets.
+  for (MCModule::atom_iterator AI = Module->atom_begin(),
+                               AE = Module->atom_end();
+       AI != AE; ++AI) {
+    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+    if (!TA) continue;
+    Calls.insert(TA->getBeginAddr());
+    for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+         II != IE; ++II) {
+      if (MIA.isTerminator(II->Inst))
+        Splits.insert(II->Address + II->Size);
+      uint64_t Target;
+      if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
+        if (MIA.isCall(II->Inst))
+          Calls.insert(Target);
+        Splits.insert(Target);
+      }
+    }
+  }
+
+  // Split text atoms into basic block atoms.
+  for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
+       SI != SE; ++SI) {
+    MCAtom *A = Module->findAtomContaining(*SI);
+    if (!A) continue;
+    MCTextAtom *TA = cast<MCTextAtom>(A);
+    BBInfos[TA->getBeginAddr()].Atom = TA;
+    if (TA->getBeginAddr() == *SI)
+      continue;
+    MCTextAtom *NewAtom = TA->split(*SI);
+    BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
+    StringRef BBName = TA->getName();
+    BBName = BBName.substr(0, BBName.find_last_of(':'));
+    NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
+  }
+
+  // Compute succs/preds.
+  for (MCModule::atom_iterator AI = Module->atom_begin(),
+                               AE = Module->atom_end();
+                               AI != AE; ++AI) {
+    MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+    if (!TA) continue;
+    BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
+    const MCDecodedInst &LI = TA->back();
+    if (MIA.isBranch(LI.Inst)) {
+      uint64_t Target;
+      if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
+        CurBB.addSucc(BBInfos[Target]);
+      if (MIA.isConditionalBranch(LI.Inst))
+        CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+    } else if (!MIA.isTerminator(LI.Inst))
+      CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+  }
+
+
+  // Create functions and basic blocks.
+  for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
+       CI != CE; ++CI) {
+    BBInfo &BBI = BBInfos[*CI];
+    if (!BBI.Atom) continue;
+
+    MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
+
+    // Create MCBBs.
+    SmallSetVector<BBInfo*, 16> Worklist;
+    Worklist.insert(&BBI);
+    for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+      BBInfo *BBI = Worklist[WI];
+      if (!BBI->Atom)
+        continue;
+      BBI->BB = &MCFN.createBlock(*BBI->Atom);
+      // Add all predecessors and successors to the worklist.
+      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+                                 SI != SE; ++SI)
+        Worklist.insert(*SI);
+      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+                                 PI != PE; ++PI)
+        Worklist.insert(*PI);
+    }
+
+    // Set preds/succs.
+    for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+      BBInfo *BBI = Worklist[WI];
+      MCBasicBlock *MCBB = BBI->BB;
+      if (!MCBB)
+        continue;
+      for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+                                 SI != SE; ++SI)
+        MCBB->addSuccessor((*SI)->BB);
+      for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+                                 PI != PE; ++PI)
+        MCBB->addPredecessor((*PI)->BB);
+    }
+  }
+}