summaryrefslogtreecommitdiff
path: root/lib/MC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/MC')
-rw-r--r--lib/MC/CMakeLists.txt2
-rw-r--r--lib/MC/MCAtom.cpp121
-rw-r--r--lib/MC/MCFunction.cpp55
-rw-r--r--lib/MC/MCInstrAnalysis.cpp9
-rw-r--r--lib/MC/MCModule.cpp79
-rw-r--r--lib/MC/MCObjectDisassembler.cpp216
6 files changed, 411 insertions, 71 deletions
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 5377c5c8d8..89e2aaf48b 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMMC
MCELF.cpp
MCELFObjectTargetWriter.cpp
MCELFStreamer.cpp
+ MCFunction.cpp
MCExpr.cpp
MCExternalSymbolizer.cpp
MCInst.cpp
@@ -26,6 +27,7 @@ add_llvm_library(LLVMMC
MCModule.cpp
MCNullStreamer.cpp
MCObjectFileInfo.cpp
+ MCObjectDisassembler.cpp
MCObjectStreamer.cpp
MCObjectSymbolizer.cpp
MCObjectWriter.cpp
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp
index d71444324f..2626b39db4 100644
--- a/lib/MC/MCAtom.cpp
+++ b/lib/MC/MCAtom.cpp
@@ -10,88 +10,101 @@
#include "llvm/MC/MCAtom.h"
#include "llvm/MC/MCModule.h"
#include "llvm/Support/ErrorHandling.h"
+#include <iterator>
using namespace llvm;
-void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) {
- assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!");
-
- assert(Address < End+Size &&
- "Instruction not contiguous with end of atom!");
- if (Address > End)
- Parent->remap(this, Begin, End+Size);
-
- Text.push_back(std::make_pair(Address, I));
+void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
+ Parent->remap(this, NewBegin, NewEnd);
}
-void MCAtom::addData(const MCData &D) {
- assert(Type == DataAtom && "Trying to add MCData to a non-data atom!");
- Parent->remap(this, Begin, End+1);
-
- Data.push_back(D);
+void MCAtom::remapForTruncate(uint64_t TruncPt) {
+ assert((TruncPt >= Begin && TruncPt < End) &&
+ "Truncation point not contained in atom!");
+ remap(Begin, TruncPt);
}
-MCAtom *MCAtom::split(uint64_t SplitPt) {
+void MCAtom::remapForSplit(uint64_t SplitPt,
+ uint64_t &LBegin, uint64_t &LEnd,
+ uint64_t &RBegin, uint64_t &REnd) {
assert((SplitPt > Begin && SplitPt <= End) &&
"Splitting at point not contained in atom!");
// Compute the new begin/end points.
- uint64_t LeftBegin = Begin;
- uint64_t LeftEnd = SplitPt - 1;
- uint64_t RightBegin = SplitPt;
- uint64_t RightEnd = End;
+ LBegin = Begin;
+ LEnd = SplitPt - 1;
+ RBegin = SplitPt;
+ REnd = End;
// Remap this atom to become the lower of the two new ones.
- Parent->remap(this, LeftBegin, LeftEnd);
+ remap(LBegin, LEnd);
+}
- // Create a new atom for the higher atom.
- MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd);
+// MCDataAtom
- // Split the contents of the original atom between it and the new one. The
- // precise method depends on whether this is a data or a text atom.
- if (isDataAtom()) {
- std::vector<MCData>::iterator I = Data.begin() + (RightBegin - LeftBegin);
+void MCDataAtom::addData(const MCData &D) {
+ Data.push_back(D);
+ if (Data.size() > Begin - End)
+ remap(Begin, End + 1);
+}
- assert(I != Data.end() && "Split point not found in range!");
+void MCDataAtom::truncate(uint64_t TruncPt) {
+ remapForTruncate(TruncPt);
- std::copy(I, Data.end(), RightAtom->Data.end());
- Data.erase(I, Data.end());
- } else if (isTextAtom()) {
- std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+ Data.resize(TruncPt - Begin + 1);
+}
- while (I != Text.end() && I->first < SplitPt) ++I;
+MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
+ uint64_t LBegin, LEnd, RBegin, REnd;
+ remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
- assert(I != Text.end() && "Split point not found in disassembly!");
- assert(I->first == SplitPt &&
- "Split point does not fall on instruction boundary!");
+ MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
+ RightAtom->setName(getName());
- std::copy(I, Text.end(), RightAtom->Text.end());
- Text.erase(I, Text.end());
- } else
- llvm_unreachable("Unknown atom type!");
+ std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
+ assert(I != Data.end() && "Split point not found in range!");
+ std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
+ Data.erase(I, Data.end());
return RightAtom;
}
-void MCAtom::truncate(uint64_t TruncPt) {
- assert((TruncPt >= Begin && TruncPt < End) &&
- "Truncation point not contained in atom!");
+// MCTextAtom
- Parent->remap(this, Begin, TruncPt);
+void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
+ if (NextInstAddress > End)
+ remap(Begin, NextInstAddress);
+ Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
+ NextInstAddress += Size;
+}
- if (isDataAtom()) {
- Data.resize(TruncPt - Begin + 1);
- } else if (isTextAtom()) {
- std::vector<std::pair<uint64_t, MCInst> >::iterator I = Text.begin();
+void MCTextAtom::truncate(uint64_t TruncPt) {
+ remapForTruncate(TruncPt);
- while (I != Text.end() && I->first <= TruncPt) ++I;
+ InstListTy::iterator I = Insts.begin();
+ while (I != Insts.end() && I->Address <= TruncPt) ++I;
- assert(I != Text.end() && "Truncation point not found in disassembly!");
- assert(I->first == TruncPt+1 &&
- "Truncation point does not fall on instruction boundary");
+ assert(I != Insts.end() && "Truncation point not found in disassembly!");
+ assert(I->Address == TruncPt + 1 &&
+ "Truncation point does not fall on instruction boundary");
- Text.erase(I, Text.end());
- } else
- llvm_unreachable("Unknown atom type!");
+ Insts.erase(I, Insts.end());
}
+MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
+ uint64_t LBegin, LEnd, RBegin, REnd;
+ remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+ MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
+ RightAtom->setName(getName());
+
+ InstListTy::iterator I = Insts.begin();
+ while (I != Insts.end() && I->Address < SplitPt) ++I;
+ assert(I != Insts.end() && "Split point not found in disassembly!");
+ assert(I->Address == SplitPt &&
+ "Split point does not fall on instruction boundary!");
+
+ std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
+ Insts.erase(I, Insts.end());
+ return RightAtom;
+}
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp
new file mode 100644
index 0000000000..2665d3e167
--- /dev/null
+++ b/lib/MC/MCFunction.cpp
@@ -0,0 +1,55 @@
+//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+// MCFunction
+
+MCFunction::MCFunction(StringRef Name)
+ : Name(Name)
+{}
+
+MCFunction::~MCFunction() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
+ Blocks.push_back(new MCBasicBlock(TA, this));
+ return *Blocks.back();
+}
+
+// MCBasicBlock
+
+MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
+ : Insts(&Insts), Parent(Parent)
+{}
+
+void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
+ Successors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
+ return std::find(Successors.begin(), Successors.end(),
+ MCBB) != Successors.end();
+}
+
+void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
+ Predecessors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
+ return std::find(Predecessors.begin(), Predecessors.end(),
+ MCBB) != Predecessors.end();
+}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 7736702f35..2d8336d77a 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -10,12 +10,13 @@
#include "llvm/MC/MCInstrAnalysis.h"
using namespace llvm;
-uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
if (Inst.getNumOperands() == 0 ||
Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
- return -1ULL;
+ return false;
int64_t Imm = Inst.getOperand(0).getImm();
- return Addr+Size+Imm;
+ Target = Addr+Size+Imm;
+ return true;
}
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index f563160833..50bac476fa 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -7,39 +7,92 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAtom.h"
#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCFunction.h"
+#include <algorithm>
using namespace llvm;
-MCAtom *MCModule::createAtom(MCAtom::AtomType Type,
- uint64_t Begin, uint64_t End) {
+static bool AtomComp(const MCAtom *L, uint64_t Addr) {
+ return L->getEndAddr() < Addr;
+}
+
+void MCModule::map(MCAtom *NewAtom) {
+ uint64_t Begin = NewAtom->Begin,
+ End = NewAtom->End;
+
assert(Begin < End && "Creating MCAtom with endpoints reversed?");
// Check for atoms already covering this range.
- IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Begin);
- assert((!I.valid() || I.start() < End) && "Offset range already occupied!");
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Begin, AtomComp);
+ assert((I == atom_end() || (*I)->getBeginAddr() > End)
+ && "Offset range already occupied!");
- // Create the new atom and add it to our maps.
- MCAtom *NewAtom = new MCAtom(Type, this, Begin, End);
- AtomAllocationTracker.insert(NewAtom);
- OffsetMap.insert(Begin, End, NewAtom);
+ // Insert the new atom to the list.
+ Atoms.insert(I, NewAtom);
+}
+
+MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
+ MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
+ map(NewAtom);
+ return NewAtom;
+}
+
+MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
+ MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
+ map(NewAtom);
return NewAtom;
}
// remap - Update the interval mapping for an atom.
void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
// Find and erase the old mapping.
- IntervalMap<uint64_t, MCAtom*>::iterator I = OffsetMap.find(Atom->Begin);
- assert(I.valid() && "Atom offset not found in module!");
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Atom->Begin, AtomComp);
+ assert(I != atom_end() && "Atom offset not found in module!");
assert(*I == Atom && "Previous atom mapping was invalid!");
- I.erase();
+ Atoms.erase(I);
// Insert the new mapping.
- OffsetMap.insert(NewBegin, NewEnd, Atom);
+ AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
+ NewBegin, AtomComp);
+ Atoms.insert(NewI, Atom);
// Update the atom internal bounds.
Atom->Begin = NewBegin;
Atom->End = NewEnd;
}
+const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
+ AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Addr, AtomComp);
+ if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+ return *I;
+ return 0;
+}
+
+MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Addr, AtomComp);
+ if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+ return *I;
+ return 0;
+}
+
+MCFunction *MCModule::createFunction(const StringRef &Name) {
+ Functions.push_back(new MCFunction(Name));
+ return Functions.back();
+}
+
+MCModule::~MCModule() {
+ for (AtomListTy::iterator AI = atom_begin(),
+ AE = atom_end();
+ AI != AE; ++AI)
+ delete *AI;
+ for (FunctionListTy::iterator FI = func_begin(),
+ FE = func_end();
+ FI != FE; ++FI)
+ delete *FI;
+}
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp
new file mode 100644
index 0000000000..bb3de1779e
--- /dev/null
+++ b/lib/MC/MCObjectDisassembler.cpp
@@ -0,0 +1,216 @@
+//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <set>
+
+using namespace llvm;
+using namespace object;
+
+MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
+ const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA)
+ : Obj(Obj), Dis(Dis), MIA(MIA) {}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+ MCModule *Module = new MCModule;
+ buildSectionAtoms(Module);
+ if (withCFG)
+ buildCFG(Module);
+ return Module;
+}
+
+void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
+ error_code ec;
+ for (section_iterator SI = Obj.begin_sections(),
+ SE = Obj.end_sections();
+ SI != SE;
+ SI.increment(ec)) {
+ if (ec) break;
+
+ bool isText; SI->isText(isText);
+ bool isData; SI->isData(isData);
+ if (!isData && !isText)
+ continue;
+
+ uint64_t StartAddr; SI->getAddress(StartAddr);
+ uint64_t SecSize; SI->getSize(SecSize);
+ if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
+ continue;
+
+ StringRef Contents; SI->getContents(Contents);
+ StringRefMemoryObject memoryObject(Contents);
+
+ // We don't care about things like non-file-backed sections yet.
+ if (Contents.size() != SecSize || !SecSize)
+ continue;
+ uint64_t EndAddr = StartAddr + SecSize - 1;
+
+ StringRef SecName; SI->getName(SecName);
+
+ if (isText) {
+ MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr);
+ Text->setName(SecName);
+ uint64_t InstSize;
+ for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+ MCInst Inst;
+ if (Dis.getInstruction(Inst, InstSize, memoryObject, Index,
+ nulls(), nulls()))
+ Text->addInst(Inst, InstSize);
+ else
+ // We don't care about splitting mixed atoms either.
+ llvm_unreachable("Couldn't disassemble instruction in atom.");
+ }
+
+ } else {
+ MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
+ Data->setName(SecName);
+ for (uint64_t Index = 0; Index < SecSize; ++Index)
+ Data->addData(Contents[Index]);
+ }
+ }
+}
+
+namespace {
+ struct BBInfo;
+ typedef std::set<BBInfo*> BBInfoSetTy;
+
+ struct BBInfo {
+ MCTextAtom *Atom;
+ MCBasicBlock *BB;
+ BBInfoSetTy Succs;
+ BBInfoSetTy Preds;
+
+ void addSucc(BBInfo &Succ) {
+ Succs.insert(&Succ);
+ Succ.Preds.insert(this);
+ }
+ };
+}
+
+void MCObjectDisassembler::buildCFG(MCModule *Module) {
+ typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+ BBInfoByAddrTy BBInfos;
+ typedef std::set<uint64_t> AddressSetTy;
+ AddressSetTy Splits;
+ AddressSetTy Calls;
+
+ assert(Module->func_begin() == Module->func_end()
+ && "Module already has a CFG!");
+
+ // First, determine the basic block boundaries and call targets.
+ for (MCModule::atom_iterator AI = Module->atom_begin(),
+ AE = Module->atom_end();
+ AI != AE; ++AI) {
+ MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+ if (!TA) continue;
+ Calls.insert(TA->getBeginAddr());
+ for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+ II != IE; ++II) {
+ if (MIA.isTerminator(II->Inst))
+ Splits.insert(II->Address + II->Size);
+ uint64_t Target;
+ if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
+ if (MIA.isCall(II->Inst))
+ Calls.insert(Target);
+ Splits.insert(Target);
+ }
+ }
+ }
+
+ // Split text atoms into basic block atoms.
+ for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
+ SI != SE; ++SI) {
+ MCAtom *A = Module->findAtomContaining(*SI);
+ if (!A) continue;
+ MCTextAtom *TA = cast<MCTextAtom>(A);
+ BBInfos[TA->getBeginAddr()].Atom = TA;
+ if (TA->getBeginAddr() == *SI)
+ continue;
+ MCTextAtom *NewAtom = TA->split(*SI);
+ BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
+ StringRef BBName = TA->getName();
+ BBName = BBName.substr(0, BBName.find_last_of(':'));
+ NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
+ }
+
+ // Compute succs/preds.
+ for (MCModule::atom_iterator AI = Module->atom_begin(),
+ AE = Module->atom_end();
+ AI != AE; ++AI) {
+ MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+ if (!TA) continue;
+ BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
+ const MCDecodedInst &LI = TA->back();
+ if (MIA.isBranch(LI.Inst)) {
+ uint64_t Target;
+ if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
+ CurBB.addSucc(BBInfos[Target]);
+ if (MIA.isConditionalBranch(LI.Inst))
+ CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+ } else if (!MIA.isTerminator(LI.Inst))
+ CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+ }
+
+
+ // Create functions and basic blocks.
+ for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
+ CI != CE; ++CI) {
+ BBInfo &BBI = BBInfos[*CI];
+ if (!BBI.Atom) continue;
+
+ MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
+
+ // Create MCBBs.
+ SmallSetVector<BBInfo*, 16> Worklist;
+ Worklist.insert(&BBI);
+ for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+ BBInfo *BBI = Worklist[WI];
+ if (!BBI->Atom)
+ continue;
+ BBI->BB = &MCFN.createBlock(*BBI->Atom);
+ // Add all predecessors and successors to the worklist.
+ for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+ SI != SE; ++SI)
+ Worklist.insert(*SI);
+ for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+ PI != PE; ++PI)
+ Worklist.insert(*PI);
+ }
+
+ // Set preds/succs.
+ for (size_t WI = 0; WI < Worklist.size(); ++WI) {
+ BBInfo *BBI = Worklist[WI];
+ MCBasicBlock *MCBB = BBI->BB;
+ if (!MCBB)
+ continue;
+ for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+ SI != SE; ++SI)
+ MCBB->addSuccessor((*SI)->BB);
+ for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+ PI != PE; ++PI)
+ MCBB->addPredecessor((*PI)->BB);
+ }
+ }
+}