From 685a2501b20baf688f6cc087f4b92bbafcd8028e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 20 Jul 2011 19:37:35 +0000 Subject: Sketch out an CFG reconstruction mode for llvm-objdump. - Not great yet, but it's a start. - Requires an object file with a symbol table. (I really want to fix this, but it'll need a whole new algorithm) - ELF and COFF won't work at the moment due to libObject shortcomings. To try it out run $ llvm-objdump -d --cfg foo.o This will create a graphviz file for every symbol in the object file's text section containing a CFG. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135608 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objdump/MCFunction.cpp | 113 ++++++++++++++++++++++++++++++++++++ tools/llvm-objdump/MCFunction.h | 88 ++++++++++++++++++++++++++++ tools/llvm-objdump/llvm-objdump.cpp | 44 +++++++++++++- 3 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 tools/llvm-objdump/MCFunction.cpp create mode 100644 tools/llvm-objdump/MCFunction.h (limited to 'tools') diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp new file mode 100644 index 0000000000..dd31402ddc --- /dev/null +++ b/tools/llvm-objdump/MCFunction.cpp @@ -0,0 +1,113 @@ +//===-- MCFunction.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the algorithm to break down a region of machine code +// into basic blocks and try to reconstruct a CFG from it. +// +//===----------------------------------------------------------------------===// + +#include "MCFunction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include +using namespace llvm; + +MCFunction +MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, + const MemoryObject &Region, uint64_t Start, + uint64_t End, const MCInstrInfo *InstrInfo, + raw_ostream &DebugOut) { + std::set Splits; + Splits.insert(Start); + std::vector Instructions; + uint64_t Size; + + // Disassemble code and gather basic block split points. + for (uint64_t Index = Start; Index < End; Index += Size) { + MCInst Inst; + + if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut)) { + const MCInstrDesc &Desc = InstrInfo->get(Inst.getOpcode()); + if (Desc.isBranch()) { + if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) { + int64_t Imm = Inst.getOperand(0).getImm(); + // FIXME: Distinguish relocations from nop jumps. + if (Imm != 0) { + assert(Index+Imm+Size < End && "Branch out of function."); + Splits.insert(Index+Imm+Size); + } + } + Splits.insert(Index+Size); + } + + Instructions.push_back(MCDecodedInst(Index, Size, Inst)); + } else { + errs() << "warning: invalid instruction encoding\n"; + if (Size == 0) + Size = 1; // skip illegible bytes + } + + } + + MCFunction f(Name); + + // Create basic blocks. + unsigned ii = 0, ie = Instructions.size(); + for (std::set::iterator spi = Splits.begin(), + spe = Splits.end(); spi != spe; ++spi) { + MCBasicBlock BB; + uint64_t BlockEnd = llvm::next(spi) == spe ? End : *llvm::next(spi); + // Add instructions to the BB. + for (; ii != ie; ++ii) { + if (Instructions[ii].Address < *spi || + Instructions[ii].Address >= BlockEnd) + break; + BB.addInst(Instructions[ii]); + } + f.addBlock(*spi, BB); + } + + // Calculate successors of each block. + for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { + MCBasicBlock &BB = i->second; + if (BB.getInsts().empty()) continue; + const MCDecodedInst &Inst = BB.getInsts().back(); + const MCInstrDesc &Desc = InstrInfo->get(Inst.Inst.getOpcode()); + + if (Desc.isBranch()) { + // PCRel branch, we know the destination. + if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) { + int64_t Imm = Inst.Inst.getOperand(0).getImm(); + if (Imm != 0) + BB.addSucc(&f.getBlockAtAddress(Inst.Address+Inst.Size+Imm)); + // Conditional branches can also fall through to the next block. + if (Desc.isConditionalBranch() && llvm::next(i) != e) + BB.addSucc(&next(i)->second); + } else { + // Indirect branch. Bail and add all blocks of the function as a + // successor. + for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) + BB.addSucc(&i->second); + } + } else { + // No branch. Fall through to the next block. + if (!Desc.isReturn() && next(i) != e) + BB.addSucc(&next(i)->second); + } + } + + return f; +} diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h new file mode 100644 index 0000000000..60f6429209 --- /dev/null +++ b/tools/llvm-objdump/MCFunction.h @@ -0,0 +1,88 @@ +//===-- MCFunction.h ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the data structures to hold a CFG reconstructed from +// machine code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/MC/MCInst.h" +#include + +namespace llvm { + +class MCDisassembler; +class MCInstrInfo; +class MemoryObject; +class raw_ostream; + +/// MCDecodedInst - Small container to hold an MCInst and associated info like +/// address and size. +struct MCDecodedInst { + uint64_t Address; + uint64_t Size; + MCInst Inst; + + MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst) + : Address(Address), Size(Size), Inst(Inst) {} +}; + +/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing +/// MCBasicBlocks. +class MCBasicBlock { + SmallVector Insts; + typedef SmallPtrSet SetTy; + SetTy Succs; +public: + ArrayRef getInsts() const { return Insts; } + + typedef SetTy::const_iterator succ_iterator; + succ_iterator succ_begin() const { return Succs.begin(); } + succ_iterator succ_end() const { return Succs.end(); } + + void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); } + void addSucc(MCBasicBlock *BB) { Succs.insert(BB); } +}; + +/// MCFunction - Represents a named function in machine code, containing +/// multiple MCBasicBlocks. +class MCFunction { + const StringRef Name; + // Keep BBs sorted by address. + typedef std::map MapTy; + MapTy Blocks; +public: + MCFunction(StringRef Name) : Name(Name) {} + + // Create an MCFunction from a region of binary machine code. + static MCFunction + createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, + const MemoryObject &Region, uint64_t Start, uint64_t End, + const MCInstrInfo *InstrInfo, raw_ostream &DebugOut); + + typedef MapTy::iterator iterator; + iterator begin() { return Blocks.begin(); } + iterator end() { return Blocks.end(); } + + StringRef getName() const { return Name; } + + MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) { + assert(!Blocks.count(Address) && "Already a BB at address."); + return Blocks[Address] = BB; + } + + MCBasicBlock &getBlockAtAddress(uint64_t Address) { + assert(Blocks.count(Address) && "No BB at address."); + return Blocks[Address]; + } +}; + +} diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 8f6e9df275..21df7ccd9d 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "MCFunction.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Triple.h" @@ -21,6 +22,8 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" @@ -52,6 +55,10 @@ namespace { Disassembled("d", cl::desc("Alias for --disassemble"), cl::aliasopt(Disassemble)); + cl::opt + CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" + "write it to a graphviz file")); + cl::opt TripleName("triple", cl::desc("Target triple to disassemble for, " "see -version for available targets")); @@ -156,6 +163,7 @@ static void DisassembleInput(const StringRef &Filename) { // GetTarget prints out stuff. return; } + const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo(); outs() << '\n'; outs() << Filename @@ -233,15 +241,14 @@ static void DisassembleInput(const StringRef &Filename) { uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1; outs() << '\n' << Symbols[si].second << ":\n"; - for (Index = Start; Index < End; Index += Size) { - MCInst Inst; - #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); #else raw_ostream &DebugOut = nulls(); #endif + for (Index = Start; Index < End; Index += Size) { + MCInst Inst; if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) { uint64_t addr; if (error(i->getAddress(addr))) break; @@ -255,6 +262,36 @@ static void DisassembleInput(const StringRef &Filename) { Size = 1; // skip illegible bytes } } + + if (CFG) { + MCFunction f = + MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(), + memoryObject, Start, End, InstrInfo, + DebugOut); + + // Start a new dot file. + std::string Error; + raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error); + + Out << "digraph " << f.getName() << " {\n"; + Out << "graph [ rankdir = \"LR\" ];\n"; + for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { + Out << '"' << (uintptr_t)&i->second << "\" [ label=\""; + // Print instructions. + for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; + ++ii) { + IP->printInst(&i->second.getInsts()[ii].Inst, Out); + Out << '|'; + } + Out << "\" shape=\"record\" ];\n"; + + // Add edges. + for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), + se = i->second.succ_end(); si != se; ++si) + Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n"; + } + Out << "}\n"; + } } } } @@ -271,6 +308,7 @@ int main(int argc, char **argv) { llvm::InitializeAllTargets(); llvm::InitializeAllMCAsmInfos(); llvm::InitializeAllMCCodeGenInfos(); + llvm::InitializeAllMCInstrInfos(); llvm::InitializeAllAsmPrinters(); llvm::InitializeAllAsmParsers(); llvm::InitializeAllDisassemblers(); -- cgit v1.2.3