diff options
Diffstat (limited to 'tools/llvm-objdump')
-rw-r--r-- | tools/llvm-objdump/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tools/llvm-objdump/MCFunction.cpp | 138 | ||||
-rw-r--r-- | tools/llvm-objdump/MCFunction.h | 100 | ||||
-rw-r--r-- | tools/llvm-objdump/MachODump.cpp | 334 | ||||
-rw-r--r-- | tools/llvm-objdump/llvm-objdump.cpp | 89 | ||||
-rw-r--r-- | tools/llvm-objdump/llvm-objdump.h | 21 |
6 files changed, 130 insertions, 553 deletions
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt index 0c49d0b457..e983ec92fb 100644 --- a/tools/llvm-objdump/CMakeLists.txt +++ b/tools/llvm-objdump/CMakeLists.txt @@ -12,5 +12,4 @@ add_llvm_tool(llvm-objdump COFFDump.cpp ELFDump.cpp MachODump.cpp - MCFunction.cpp ) diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp deleted file mode 100644 index 5c67f1b70a..0000000000 --- a/tools/llvm-objdump/MCFunction.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===-- MCFunction.cpp ----------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the algorithm to break down a region of machine code -// into basic blocks and try to reconstruct a CFG from it. -// -//===----------------------------------------------------------------------===// - -#include "MCFunction.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -#include <set> -using namespace llvm; - -MCFunction -MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, - const MemoryObject &Region, uint64_t Start, - uint64_t End, const MCInstrAnalysis *Ana, - raw_ostream &DebugOut, - SmallVectorImpl<uint64_t> &Calls) { - std::vector<MCDecodedInst> Instructions; - std::set<uint64_t> Splits; - Splits.insert(Start); - uint64_t Size; - - MCFunction f(Name); - - { - DenseSet<uint64_t> VisitedInsts; - SmallVector<uint64_t, 16> WorkList; - WorkList.push_back(Start); - // Disassemble code and gather basic block split points. - while (!WorkList.empty()) { - uint64_t Index = WorkList.pop_back_val(); - if (VisitedInsts.find(Index) != VisitedInsts.end()) - continue; // Already visited this location. - - for (;Index < End; Index += Size) { - VisitedInsts.insert(Index); - - MCInst Inst; - if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ - Instructions.push_back(MCDecodedInst(Index, Size, Inst)); - if (Ana->isBranch(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - if (targ != -1ULL && targ == Index+Size) - continue; // Skip nop jumps. - - // If we could determine the branch target, make a note to start a - // new basic block there and add the target to the worklist. - if (targ != -1ULL) { - Splits.insert(targ); - WorkList.push_back(targ); - WorkList.push_back(Index+Size); - } - Splits.insert(Index+Size); - break; - } else if (Ana->isReturn(Inst)) { - // Return instruction. This basic block ends here. - Splits.insert(Index+Size); - break; - } else if (Ana->isCall(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - // Add the call to the call list if the destination is known. - if (targ != -1ULL && targ != Index+Size) - Calls.push_back(targ); - } - } else { - errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes - } - } - } - } - - // Make sure the instruction list is sorted. - std::sort(Instructions.begin(), Instructions.end()); - - // Create basic blocks. - unsigned ii = 0, ie = Instructions.size(); - for (std::set<uint64_t>::iterator spi = Splits.begin(), - spe = llvm::prior(Splits.end()); spi != spe; ++spi) { - MCBasicBlock BB; - uint64_t BlockEnd = *llvm::next(spi); - // Add instructions to the BB. - for (; ii != ie; ++ii) { - if (Instructions[ii].Address < *spi || - Instructions[ii].Address >= BlockEnd) - break; - BB.addInst(Instructions[ii]); - } - f.addBlock(*spi, BB); - } - - std::sort(f.Blocks.begin(), f.Blocks.end()); - - // Calculate successors of each block. - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { - MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second); - if (BB.getInsts().empty()) continue; - const MCDecodedInst &Inst = BB.getInsts().back(); - - if (Ana->isBranch(Inst.Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size); - if (targ == -1ULL) { - // Indirect branch. Bail and add all blocks of the function as a - // successor. - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) - BB.addSucc(i->first); - } else if (targ != Inst.Address+Inst.Size) - BB.addSucc(targ); - // Conditional branches can also fall through to the next block. - if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(llvm::next(i)->first); - } else { - // No branch. Fall through to the next block. - if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(llvm::next(i)->first); - } - } - - return f; -} diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h deleted file mode 100644 index 6d3a548d48..0000000000 --- a/tools/llvm-objdump/MCFunction.h +++ /dev/null @@ -1,100 +0,0 @@ -//===-- MCFunction.h ------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the data structures to hold a CFG reconstructed from -// machine code. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_OBJECTDUMP_MCFUNCTION_H -#define LLVM_OBJECTDUMP_MCFUNCTION_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/MC/MCInst.h" -#include <map> - -namespace llvm { - -class MCDisassembler; -class MCInstrAnalysis; -class MemoryObject; -class raw_ostream; - -/// MCDecodedInst - Small container to hold an MCInst and associated info like -/// address and size. -struct MCDecodedInst { - uint64_t Address; - uint64_t Size; - MCInst Inst; - - MCDecodedInst() {} - MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst) - : Address(Address), Size(Size), Inst(Inst) {} - - bool operator<(const MCDecodedInst &RHS) const { - return Address < RHS.Address; - } -}; - -/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing -/// MCBasicBlocks. -class MCBasicBlock { - std::vector<MCDecodedInst> Insts; - typedef DenseSet<uint64_t> SetTy; - SetTy Succs; -public: - ArrayRef<MCDecodedInst> getInsts() const { return Insts; } - - typedef SetTy::const_iterator succ_iterator; - succ_iterator succ_begin() const { return Succs.begin(); } - succ_iterator succ_end() const { return Succs.end(); } - - bool contains(uint64_t Addr) const { return Succs.count(Addr); } - - void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); } - void addSucc(uint64_t Addr) { Succs.insert(Addr); } - - bool operator<(const MCBasicBlock &RHS) const { - return Insts.size() < RHS.Insts.size(); - } -}; - -/// MCFunction - Represents a named function in machine code, containing -/// multiple MCBasicBlocks. -class MCFunction { - const StringRef Name; - // Keep BBs sorted by address. - typedef std::vector<std::pair<uint64_t, MCBasicBlock> > MapTy; - MapTy Blocks; -public: - MCFunction(StringRef Name) : Name(Name) {} - - // Create an MCFunction from a region of binary machine code. - static MCFunction - createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, - const MemoryObject &Region, uint64_t Start, uint64_t End, - const MCInstrAnalysis *Ana, raw_ostream &DebugOut, - SmallVectorImpl<uint64_t> &Calls); - - typedef MapTy::const_iterator iterator; - iterator begin() const { return Blocks.begin(); } - iterator end() const { return Blocks.end(); } - - StringRef getName() const { return Name; } - - MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) { - Blocks.push_back(std::make_pair(Address, BB)); - return Blocks.back().second; - } -}; - -} - -#endif diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index c7e5cc1ede..03a383eb12 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" -#include "MCFunction.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/MC/MCAsmInfo.h" @@ -44,10 +44,6 @@ using namespace llvm; using namespace object; static cl::opt<bool> - CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" - " write it to a graphviz file (MachO-only)")); - -static cl::opt<bool> UseDbg("g", cl::desc("Print line information from debug info if available")); static cl::opt<std::string> @@ -91,99 +87,6 @@ struct SymbolSorter { } }; -// Print additional information about an address, if available. -static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections, - const MachOObjectFile *MachOObj, raw_ostream &OS) { - for (unsigned i = 0; i != Sections.size(); ++i) { - uint64_t SectAddr = 0, SectSize = 0; - Sections[i].getAddress(SectAddr); - Sections[i].getSize(SectSize); - uint64_t addr = SectAddr; - if (SectAddr <= Address && - SectAddr + SectSize > Address) { - StringRef bytes, name; - Sections[i].getContents(bytes); - Sections[i].getName(name); - // Print constant strings. - if (!name.compare("__cstring")) - OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"'; - // Print constant CFStrings. - if (!name.compare("__cfstring")) - OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"'; - } - } -} - -typedef std::map<uint64_t, MCFunction*> FunctionMapTy; -typedef SmallVector<MCFunction, 16> FunctionListTy; -static void createMCFunctionAndSaveCalls(StringRef Name, - const MCDisassembler *DisAsm, - MemoryObject &Object, uint64_t Start, - uint64_t End, - MCInstrAnalysis *InstrAnalysis, - uint64_t Address, - raw_ostream &DebugOut, - FunctionMapTy &FunctionMap, - FunctionListTy &Functions) { - SmallVector<uint64_t, 16> Calls; - MCFunction f = - MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End, - InstrAnalysis, DebugOut, Calls); - Functions.push_back(f); - FunctionMap[Address] = &Functions.back(); - - // Add the gathered callees to the map. - for (unsigned i = 0, e = Calls.size(); i != e; ++i) - FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0)); -} - -// Write a graphviz file for the CFG inside an MCFunction. -static void emitDOTFile(const char *FileName, const MCFunction &f, - MCInstPrinter *IP) { - // Start a new dot file. - std::string Error; - raw_fd_ostream Out(FileName, Error); - if (!Error.empty()) { - errs() << "llvm-objdump: warning: " << Error << '\n'; - return; - } - - Out << "digraph " << f.getName() << " {\n"; - Out << "graph [ rankdir = \"LR\" ];\n"; - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { - bool hasPreds = false; - // Only print blocks that have predecessors. - // FIXME: Slow. - for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; - ++pi) - if (pi->second.contains(i->first)) { - hasPreds = true; - break; - } - - if (!hasPreds && i != f.begin()) - continue; - - Out << '"' << i->first << "\" [ label=\"<a>"; - // Print instructions. - for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; - ++ii) { - // Escape special chars and print the instruction in mnemonic form. - std::string Str; - raw_string_ostream OS(Str); - IP->printInst(&i->second.getInsts()[ii].Inst, OS, ""); - Out << DOT::EscapeString(OS.str()) << '|'; - } - Out << "<o>\" shape=\"record\" ];\n"; - - // Add edges. - for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), - se = i->second.succ_end(); si != se; ++si) - Out << i->first << ":o -> " << *si <<":a\n"; - } - Out << "}\n"; -} - static void getSectionsAndSymbols(const macho::Header Header, MachOObjectFile *MachOObj, @@ -272,6 +175,12 @@ static void DisassembleInputMachO2(StringRef Filename, macho::Header Header = MachOOF->getHeader(); + // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to + // determine function locations will eventually go in MCObjectDisassembler. + // FIXME: Using the -cfg command line option, this code used to be able to + // annotate relocations with the referenced symbol's name, and if this was + // inside a __[cf]string section, the data it points to. This is now replaced + // by the upcoming MCSymbolizer, which needs the appropriate setup done above. std::vector<SectionRef> Sections; std::vector<SymbolRef> Symbols; SmallVector<uint64_t, 8> FoundFns; @@ -308,31 +217,24 @@ static void DisassembleInputMachO2(StringRef Filename, diContext.reset(DIContext::getDWARFContext(DbgObj)); } - FunctionMapTy FunctionMap; - FunctionListTy Functions; - for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { + + bool SectIsText = false; + Sections[SectIdx].isText(SectIsText); + if (SectIsText == false) + continue; + StringRef SectName; if (Sections[SectIdx].getName(SectName) || SectName != "__text") continue; // Skip non-text sections DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); + StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR); if (SegmentName != "__TEXT") continue; - // Insert the functions from the function starts segment into our map. - uint64_t VMAddr; - Sections[SectIdx].getAddress(VMAddr); - for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) { - StringRef SectBegin; - Sections[SectIdx].getContents(SectBegin); - uint64_t Offset = (uint64_t)SectBegin.data(); - FunctionMap.insert(std::make_pair(VMAddr + FoundFns[i]-Offset, - (MCFunction*)0)); - } - StringRef Bytes; Sections[SectIdx].getContents(Bytes); StringRefMemoryObject memoryObject(Bytes); @@ -403,52 +305,39 @@ static void DisassembleInputMachO2(StringRef Filename, symbolTableWorked = true; - if (!CFG) { - // Normal disassembly, print addresses, bytes and mnemonic form. - StringRef SymName; - Symbols[SymIdx].getName(SymName); - - outs() << SymName << ":\n"; - DILineInfo lastLine; - for (uint64_t Index = Start; Index < End; Index += Size) { - MCInst Inst; - - if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, - DebugOut, nulls())) { - uint64_t SectAddress = 0; - Sections[SectIdx].getAddress(SectAddress); - outs() << format("%8" PRIx64 ":\t", SectAddress + Index); - - DumpBytes(StringRef(Bytes.data() + Index, Size)); - IP->printInst(&Inst, outs(), ""); - - // Print debug info. - if (diContext) { - DILineInfo dli = - diContext->getLineInfoForAddress(SectAddress + Index); - // Print valid line info if it changed. - if (dli != lastLine && dli.getLine() != 0) - outs() << "\t## " << dli.getFileName() << ':' - << dli.getLine() << ':' << dli.getColumn(); - lastLine = dli; - } - outs() << "\n"; - } else { - errs() << "llvm-objdump: warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes + outs() << SymName << ":\n"; + DILineInfo lastLine; + for (uint64_t Index = Start; Index < End; Index += Size) { + MCInst Inst; + + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, + DebugOut, nulls())) { + uint64_t SectAddress = 0; + Sections[SectIdx].getAddress(SectAddress); + outs() << format("%8" PRIx64 ":\t", SectAddress + Index); + + DumpBytes(StringRef(Bytes.data() + Index, Size)); + IP->printInst(&Inst, outs(), ""); + + // Print debug info. + if (diContext) { + DILineInfo dli = + diContext->getLineInfoForAddress(SectAddress + Index); + // Print valid line info if it changed. + if (dli != lastLine && dli.getLine() != 0) + outs() << "\t## " << dli.getFileName() << ':' + << dli.getLine() << ':' << dli.getColumn(); + lastLine = dli; } + outs() << "\n"; + } else { + errs() << "llvm-objdump: warning: invalid instruction encoding\n"; + if (Size == 0) + Size = 1; // skip illegible bytes } - } else { - // Create CFG and use it for disassembly. - StringRef SymName; - Symbols[SymIdx].getName(SymName); - createMCFunctionAndSaveCalls( - SymName, DisAsm.get(), memoryObject, Start, End, - InstrAnalysis.get(), Start, DebugOut, FunctionMap, Functions); } } - if (!CFG && !symbolTableWorked) { + if (!symbolTableWorked) { // Reading the symbol table didn't work, disassemble the whole section. uint64_t SectAddress; Sections[SectIdx].getAddress(SectAddress); @@ -471,142 +360,5 @@ static void DisassembleInputMachO2(StringRef Filename, } } } - - if (CFG) { - if (!symbolTableWorked) { - // Reading the symbol table didn't work, create a big __TEXT symbol. - uint64_t SectSize = 0, SectAddress = 0; - Sections[SectIdx].getSize(SectSize); - Sections[SectIdx].getAddress(SectAddress); - createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject, - 0, SectSize, - InstrAnalysis.get(), - SectAddress, DebugOut, - FunctionMap, Functions); - } - for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(), - me = FunctionMap.end(); mi != me; ++mi) - if (mi->second == 0) { - // Create functions for the remaining callees we have gathered, - // but we didn't find a name for them. - uint64_t SectSize = 0; - Sections[SectIdx].getSize(SectSize); - - SmallVector<uint64_t, 16> Calls; - MCFunction f = - MCFunction::createFunctionFromMC("unknown", DisAsm.get(), - memoryObject, mi->first, - SectSize, - InstrAnalysis.get(), DebugOut, - Calls); - Functions.push_back(f); - mi->second = &Functions.back(); - for (unsigned i = 0, e = Calls.size(); i != e; ++i) { - std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0); - if (FunctionMap.insert(p).second) - mi = FunctionMap.begin(); - } - } - - DenseSet<uint64_t> PrintedBlocks; - for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) { - MCFunction &f = Functions[ffi]; - for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){ - if (!PrintedBlocks.insert(fi->first).second) - continue; // We already printed this block. - - // We assume a block has predecessors when it's the first block after - // a symbol. - bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end(); - - // See if this block has predecessors. - // FIXME: Slow. - for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; - ++pi) - if (pi->second.contains(fi->first)) { - hasPreds = true; - break; - } - - uint64_t SectSize = 0, SectAddress; - Sections[SectIdx].getSize(SectSize); - Sections[SectIdx].getAddress(SectAddress); - - // No predecessors, this is a data block. Print as .byte directives. - if (!hasPreds) { - uint64_t End = llvm::next(fi) == fe ? SectSize : - llvm::next(fi)->first; - outs() << "# " << End-fi->first << " bytes of data:\n"; - for (unsigned pos = fi->first; pos != End; ++pos) { - outs() << format("%8x:\t", SectAddress + pos); - DumpBytes(StringRef(Bytes.data() + pos, 1)); - outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]); - } - continue; - } - - if (fi->second.contains(fi->first)) // Print a header for simple loops - outs() << "# Loop begin:\n"; - - DILineInfo lastLine; - // Walk over the instructions and print them. - for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie; - ++ii) { - const MCDecodedInst &Inst = fi->second.getInsts()[ii]; - - // If there's a symbol at this address, print its name. - if (FunctionMap.find(SectAddress + Inst.Address) != - FunctionMap.end()) - outs() << FunctionMap[SectAddress + Inst.Address]-> getName() - << ":\n"; - - outs() << format("%8" PRIx64 ":\t", SectAddress + Inst.Address); - DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size)); - - if (fi->second.contains(fi->first)) // Indent simple loops. - outs() << '\t'; - - IP->printInst(&Inst.Inst, outs(), ""); - - // Look for relocations inside this instructions, if there is one - // print its target and additional information if available. - for (unsigned j = 0; j != Relocs.size(); ++j) - if (Relocs[j].first >= SectAddress + Inst.Address && - Relocs[j].first < SectAddress + Inst.Address + Inst.Size) { - StringRef SymName; - uint64_t Addr; - Relocs[j].second.getAddress(Addr); - Relocs[j].second.getName(SymName); - - outs() << "\t# " << SymName << ' '; - DumpAddress(Addr, Sections, MachOOF, outs()); - } - - // If this instructions contains an address, see if we can evaluate - // it and print additional information. - uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst, - Inst.Address, - Inst.Size); - if (targ != -1ULL) - DumpAddress(targ, Sections, MachOOF, outs()); - - // Print debug info. - if (diContext) { - DILineInfo dli = - diContext->getLineInfoForAddress(SectAddress + Inst.Address); - // Print valid line info if it changed. - if (dli != lastLine && dli.getLine() != 0) - outs() << "\t## " << dli.getFileName() << ':' - << dli.getLine() << ':' << dli.getColumn(); - lastLine = dli; - } - - outs() << '\n'; - } - } - - emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get()); - } - } } } diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 570ec7ed6f..d8611d8b3d 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -17,22 +17,26 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" -#include "MCFunction.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAtom.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFunction.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCModule.h" +#include "llvm/MC/MCObjectDisassembler.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectSymbolizer.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Object/MachO.h" @@ -131,6 +135,10 @@ static cl::opt<bool> Symbolize("symbolize", cl::desc("When disassembling instructions, " "try to symbolize operands.")); +static cl::opt<bool> +CFG("cfg", cl::desc("Create a CFG for every function found in the object" + " and write it to a graphviz file")); + static StringRef ToolName; bool llvm::error(error_code ec) { @@ -169,7 +177,51 @@ static const Target *getTarget(const ObjectFile *Obj = NULL) { return TheTarget; } -void llvm::StringRefMemoryObject::anchor() { } +// Write a graphviz file for the CFG inside an MCFunction. +static void emitDOTFile(const char *FileName, const MCFunction &f, + MCInstPrinter *IP) { + // Start a new dot file. + std::string Error; + raw_fd_ostream Out(FileName, Error); + if (!Error.empty()) { + errs() << "llvm-objdump: warning: " << Error << '\n'; + return; + } + + Out << "digraph \"" << f.getName() << "\" {\n"; + Out << "graph [ rankdir = \"LR\" ];\n"; + for (MCFunction::const_iterator i = f.begin(), e = f.end(); i != e; ++i) { + // Only print blocks that have predecessors. + bool hasPreds = (*i)->pred_begin() != (*i)->pred_end(); + + if (!hasPreds && i != f.begin()) + continue; + + Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>"; + // Print instructions. + for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie; + ++ii) { + if (ii != 0) // Not the first line, start a new row. + Out << '|'; + if (ii + 1 == ie) // Last line, add an end id. + Out << "<o>"; + + // Escape special chars and print the instruction in mnemonic form. + std::string Str; + raw_string_ostream OS(Str); + IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, ""); + Out << DOT::EscapeString(OS.str()); + } + Out << "\" shape=\"record\" ];\n"; + + // Add edges. + for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(), + se = (*i)->succ_end(); si != se; ++si) + Out << (*i)->getInsts()->getBeginAddr() << ":o -> " + << (*si)->getInsts()->getBeginAddr() << ":a\n"; + } + Out << "}\n"; +} void llvm::DumpBytes(StringRef bytes) { static const char hex_rep[] = "0123456789abcdef"; @@ -269,6 +321,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } } + OwningPtr<const MCInstrAnalysis> + MIA(TheTarget->createMCInstrAnalysis(MII.get())); + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); @@ -278,6 +333,34 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { return; } + if (CFG) { + OwningPtr<MCObjectDisassembler> OD( + new MCObjectDisassembler(*Obj, *DisAsm, *MIA)); + OwningPtr<MCModule> Mod(OD->buildModule(/* withCFG */ true)); + for (MCModule::const_atom_iterator AI = Mod->atom_begin(), + AE = Mod->atom_end(); + AI != AE; ++AI) { + outs() << "Atom " << (*AI)->getName() << ": \n"; + if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) { + for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); + II != IE; + ++II) { + IP->printInst(&II->Inst, outs(), ""); + outs() << "\n"; + } + } + } + for (MCModule::const_func_iterator FI = Mod->func_begin(), + FE = Mod->func_end(); + FI != FE; ++FI) { + static int filenum = 0; + emitDOTFile((Twine((*FI)->getName()) + "_" + + utostr(filenum++) + ".dot").str().c_str(), + **FI, IP.get()); + } + } + + error_code ec; for (section_iterator i = Obj->begin_sections(), e = Obj->end_sections(); diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index 3c62240f8f..87f19ba257 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -13,7 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/StringRefMemoryObject.h" namespace llvm { @@ -35,25 +35,6 @@ void DisassembleInputMachO(StringRef Filename); void printCOFFUnwindInfo(const object::COFFObjectFile* o); void printELFFileHeader(const object::ObjectFile *o); -class StringRefMemoryObject : public MemoryObject { - virtual void anchor(); - StringRef Bytes; - uint64_t Base; -public: - StringRefMemoryObject(StringRef bytes, uint64_t Base = 0) - : Bytes(bytes), Base(Base) {} - - uint64_t getBase() const { return Base; } - uint64_t getExtent() const { return Bytes.size(); } - - int readByte(uint64_t Addr, uint8_t *Byte) const { - if (Addr >= Base + getExtent() || Addr < Base) - return -1; - *Byte = Bytes[Addr - Base]; - return 0; - } -}; - } #endif |