From 54154f3bf1ae3d2dfd68cc9474cad061b3338a40 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 6 Jun 2013 17:20:50 +0000 Subject: Teach llvm-objdump with the -macho parser how to use the data in code table from the LC_DATA_IN_CODE load command. And when disassembling print the data in code formatted for the kind of data it and not disassemble those bytes. I added the format specific functionality to the derived class MachOObjectFile since these tables only appears in Mach-O object files. This is my first attempt to modify the libObject stuff so if folks have better suggestions how to fit this in or suggestions on the implementation please let me know. rdar://11791371 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183424 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objdump/MachODump.cpp | 118 +++++++++++++++++++++++++++++++++++++-- tools/macho-dump/macho-dump.cpp | 2 +- 2 files changed, 113 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 1ee3e42dab..27e1623d45 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -87,12 +87,73 @@ struct SymbolSorter { } }; +// Types for the storted data in code table that is built before disassembly +// and the predicate function to sort them. +typedef std::pair DiceTableEntry; +typedef std::vector DiceTable; +typedef DiceTable::iterator dice_table_iterator; + +static bool +compareDiceTableEntries(const DiceTableEntry i, + const DiceTableEntry j) { + return i.first == j.first; +} + +static void DumpDataInCode(const char *bytes, uint64_t Size, + unsigned short Kind) { + uint64_t Value; + + switch (Kind) { + case macho::Data: + switch (Size) { + case 4: + Value = bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + outs() << "\t.long " << Value; + break; + case 2: + Value = bytes[1] << 8 | + bytes[0]; + outs() << "\t.short " << Value; + break; + case 1: + Value = bytes[0]; + outs() << "\t.byte " << Value; + break; + } + outs() << "\t@ KIND_DATA\n"; + break; + case macho::JumpTable8: + Value = bytes[0]; + outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8"; + break; + case macho::JumpTable16: + Value = bytes[1] << 8 | + bytes[0]; + outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16"; + break; + case macho::JumpTable32: + Value = bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32"; + break; + default: + outs() << "\t@ data in code kind = " << Kind << "\n"; + break; + } +} + static void getSectionsAndSymbols(const macho::Header Header, MachOObjectFile *MachOObj, std::vector &Sections, std::vector &Symbols, - SmallVectorImpl &FoundFns) { + SmallVectorImpl &FoundFns, + uint64_t &BaseSegmentAddress) { error_code ec; for (symbol_iterator SI = MachOObj->begin_symbols(), SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec)) @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Header Header, MachOObjectFile::LoadCommandInfo Command = MachOObj->getFirstLoadCommandInfo(); + bool BaseSegmentAddressSet = false; for (unsigned i = 0; ; ++i) { if (Command.C.Type == macho::LCT_FunctionStarts) { // We found a function starts segment, parse the addresses for later @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Header Header, MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns); } + else if (Command.C.Type == macho::LCT_Segment) { + macho::SegmentLoadCommand SLC = + MachOObj->getSegmentLoadCommand(Command); + StringRef SegName = SLC.Name; + if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") { + BaseSegmentAddressSet = true; + BaseSegmentAddress = SLC.VMAddress; + } + } if (i == Header.NumLoadCommands - 1) break; @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(StringRef Filename, std::vector Sections; std::vector Symbols; SmallVector FoundFns; + uint64_t BaseSegmentAddress; - getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns); + getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns, + BaseSegmentAddress); // Make a copy of the unsorted symbol list. FIXME: duplication std::vector UnsortedSymbols(Symbols); // Sort the symbols by address, just in case they didn't come in that way. std::sort(Symbols.begin(), Symbols.end(), SymbolSorter()); + // Build a data in code table that is sorted on by the address of each entry. + uint64_t BaseAddress = 0; + if (Header.FileType == macho::HFT_Object) + Sections[0].getAddress(BaseAddress); + else + BaseAddress = BaseSegmentAddress; + DiceTable Dices; + error_code ec; + for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices(); + DI != DE; DI.increment(ec)){ + uint32_t Offset; + DI->getOffset(Offset); + Dices.push_back(std::make_pair(BaseAddress + Offset, *DI)); + } + array_pod_sort(Dices.begin(), Dices.end()); + #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); #else @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(StringRef Filename, for (uint64_t Index = Start; Index < End; Index += Size) { MCInst Inst; + uint64_t SectAddress = 0; + Sections[SectIdx].getAddress(SectAddress); + outs() << format("%8" PRIx64 ":\t", SectAddress + Index); + + // Check the data in code table here to see if this is data not an + // instruction to be disassembled. + DiceTable Dice; + Dice.push_back(std::make_pair(SectAddress + Index, DiceRef())); + dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(), + Dice.begin(), Dice.end(), + compareDiceTableEntries); + if (DTI != Dices.end()){ + uint16_t Length; + DTI->second.getLength(Length); + DumpBytes(StringRef(Bytes.data() + Index, Length)); + uint16_t Kind; + DTI->second.getKind(Kind); + DumpDataInCode(Bytes.data() + Index, Length, Kind); + continue; + } + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut, nulls())) { - uint64_t SectAddress = 0; - Sections[SectIdx].getAddress(SectAddress); - outs() << format("%8" PRIx64 ":\t", SectAddress + Index); - DumpBytes(StringRef(Bytes.data() + Index, Size)); IP->printInst(&Inst, outs(), ""); diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp index 88fd4529ab..897a785f41 100644 --- a/tools/macho-dump/macho-dump.cpp +++ b/tools/macho-dump/macho-dump.cpp @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObjectFile &Obj, << " ('datasize', " << LLC.DataSize << ")\n" << " ('_data_regions', [\n"; - unsigned NumRegions = LLC.DataSize / 8; + unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry); for (unsigned i = 0; i < NumRegions; ++i) { macho::DataInCodeTableEntry DICE = Obj.getDataInCodeTableEntry(LLC.DataOffset, i); -- cgit v1.2.3