summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Object/MachO.h88
-rw-r--r--lib/Object/MachOObjectFile.cpp46
-rw-r--r--test/Object/Inputs/macho-data-in-code.macho-thumbv7bin0 -> 680 bytes
-rw-r--r--test/Object/X86/macho-data-in-code.test7
-rw-r--r--tools/llvm-objdump/MachODump.cpp118
-rw-r--r--tools/macho-dump/macho-dump.cpp2
6 files changed, 253 insertions, 8 deletions
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index f3ba8ef757..1b9faaa9fd 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -25,6 +25,31 @@
namespace llvm {
namespace object {
+/// DiceRef - This is a value type class that represents a single
+/// data in code entry in the table in a Mach-O object file.
+class DiceRef {
+ DataRefImpl DicePimpl;
+ const ObjectFile *OwningObject;
+
+public:
+ DiceRef() : OwningObject(NULL) { }
+
+ DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
+
+ bool operator==(const DiceRef &Other) const;
+ bool operator<(const DiceRef &Other) const;
+
+ error_code getNext(DiceRef &Result) const;
+
+ error_code getOffset(uint32_t &Result) const;
+ error_code getLength(uint16_t &Result) const;
+ error_code getKind(uint16_t &Result) const;
+
+ DataRefImpl getRawDataRefImpl() const;
+ const ObjectFile *getObjectFile() const;
+};
+typedef content_iterator<DiceRef> dice_iterator;
+
class MachOObjectFile : public ObjectFile {
public:
struct LoadCommandInfo {
@@ -108,6 +133,9 @@ public:
relocation_iterator getSectionRelBegin(unsigned Index) const;
relocation_iterator getSectionRelEnd(unsigned Index) const;
+ dice_iterator begin_dices() const;
+ dice_iterator end_dices() const;
+
// In a MachO file, sections have a segment name. This is used in the .o
// files. They have a single segment, but this field specifies which segment
// a section should be put in in the final object.
@@ -152,6 +180,7 @@ public:
getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
+ macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
macho::Header getHeader() const;
macho::Header64Ext getHeader64Ext() const;
macho::IndirectSymbolTableEntry
@@ -161,6 +190,7 @@ public:
unsigned Index) const;
macho::SymtabLoadCommand getSymtabLoadCommand() const;
macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
+ macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
StringRef getStringTableData() const;
bool is64Bit() const;
@@ -175,8 +205,66 @@ private:
SectionList Sections;
const char *SymtabLoadCmd;
const char *DysymtabLoadCmd;
+ const char *DataInCodeLoadCmd;
};
+/// DiceRef
+inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
+ : DicePimpl(DiceP) , OwningObject(Owner) {}
+
+inline bool DiceRef::operator==(const DiceRef &Other) const {
+ return DicePimpl == Other.DicePimpl;
+}
+
+inline bool DiceRef::operator<(const DiceRef &Other) const {
+ return DicePimpl < Other.DicePimpl;
+}
+
+inline error_code DiceRef::getNext(DiceRef &Result) const {
+ DataRefImpl Rel = DicePimpl;
+ const macho::DataInCodeTableEntry *P =
+ reinterpret_cast<const macho::DataInCodeTableEntry *>(Rel.p);
+ Rel.p = reinterpret_cast<uintptr_t>(P + 1);
+ Result = DiceRef(Rel, OwningObject);
+ return object_error::success;
+}
+
+// Since a Mach-O data in code reference, a DiceRef, can only be created when
+// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
+// the methods that get the values of the fields of the reference.
+
+inline error_code DiceRef::getOffset(uint32_t &Result) const {
+ const MachOObjectFile *MachOOF =
+ static_cast<const MachOObjectFile *>(OwningObject);
+ macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
+ Result = Dice.Offset;
+ return object_error::success;
+}
+
+inline error_code DiceRef::getLength(uint16_t &Result) const {
+ const MachOObjectFile *MachOOF =
+ static_cast<const MachOObjectFile *>(OwningObject);
+ macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
+ Result = Dice.Length;
+ return object_error::success;
+}
+
+inline error_code DiceRef::getKind(uint16_t &Result) const {
+ const MachOObjectFile *MachOOF =
+ static_cast<const MachOObjectFile *>(OwningObject);
+ macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
+ Result = Dice.Kind;
+ return object_error::success;
+}
+
+inline DataRefImpl DiceRef::getRawDataRefImpl() const {
+ return DicePimpl;
+}
+
+inline const ObjectFile *DiceRef::getObjectFile() const {
+ return OwningObject;
+}
+
}
}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index bd5ea57c1f..e62b5a4819 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
bool IsLittleEndian, bool Is64bits,
error_code &ec)
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
- SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
+ SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
macho::LoadCommandType SegmentLoadType = is64Bit() ?
macho::LCT_Segment64 : macho::LCT_Segment;
@@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
} else if (Load.C.Type == macho::LCT_Dysymtab) {
assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
DysymtabLoadCmd = Load.Ptr;
+ } else if (Load.C.Type == macho::LCT_DataInCode) {
+ assert(!DataInCodeLoadCmd && "Multiple data in code tables");
+ DataInCodeLoadCmd = Load.Ptr;
} else if (Load.C.Type == SegmentLoadType) {
uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
for (unsigned J = 0; J < NumSections; ++J) {
@@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const {
return getSectionRelEnd(DRI);
}
+dice_iterator MachOObjectFile::begin_dices() const {
+ DataRefImpl DRI;
+ if (!DataInCodeLoadCmd)
+ return dice_iterator(DiceRef(DRI, this));
+
+ macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
+ return dice_iterator(DiceRef(DRI, this));
+}
+
+dice_iterator MachOObjectFile::end_dices() const {
+ DataRefImpl DRI;
+ if (!DataInCodeLoadCmd)
+ return dice_iterator(DiceRef(DRI, this));
+
+ macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
+ unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+ return dice_iterator(DiceRef(DRI, this));
+}
+
StringRef
MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
@@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefImpl Rel) const {
return getStruct<macho::RelocationEntry>(this, P);
}
+macho::DataInCodeTableEntry
+MachOObjectFile::getDice(DataRefImpl Rel) const {
+ const char *P = reinterpret_cast<const char *>(Rel.p);
+ return getStruct<macho::DataInCodeTableEntry>(this, P);
+}
+
macho::Header MachOObjectFile::getHeader() const {
return getStruct<macho::Header>(this, getPtr(this, 0));
}
@@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
}
+macho::LinkeditDataLoadCommand
+MachOObjectFile::getDataInCodeLoadCommand() const {
+ if (DataInCodeLoadCmd)
+ return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
+
+ // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
+ macho::LinkeditDataLoadCommand Cmd;
+ Cmd.Type = macho::LCT_DataInCode;
+ Cmd.Size = macho::LinkeditLoadCommandSize;
+ Cmd.DataOffset = 0;
+ Cmd.DataSize = 0;
+ return Cmd;
+}
+
StringRef MachOObjectFile::getStringTableData() const {
macho::SymtabLoadCommand S = getSymtabLoadCommand();
return getData().substr(S.StringTableOffset, S.StringTableSize);
diff --git a/test/Object/Inputs/macho-data-in-code.macho-thumbv7 b/test/Object/Inputs/macho-data-in-code.macho-thumbv7
new file mode 100644
index 0000000000..57649302dd
--- /dev/null
+++ b/test/Object/Inputs/macho-data-in-code.macho-thumbv7
Binary files differ
diff --git a/test/Object/X86/macho-data-in-code.test b/test/Object/X86/macho-data-in-code.test
new file mode 100644
index 0000000000..dca084c2ca
--- /dev/null
+++ b/test/Object/X86/macho-data-in-code.test
@@ -0,0 +1,7 @@
+RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
+
+CHECK: 12: 80 bd pop {r7, pc}
+
+CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA
+CHECK: 16: 00 00 movs r0, r0
+
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 1ee3e42dab..27e1623d45 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -87,12 +87,73 @@ struct SymbolSorter {
}
};
+// Types for the storted data in code table that is built before disassembly
+// and the predicate function to sort them.
+typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
+typedef std::vector<DiceTableEntry> DiceTable;
+typedef DiceTable::iterator dice_table_iterator;
+
+static bool
+compareDiceTableEntries(const DiceTableEntry i,
+ const DiceTableEntry j) {
+ return i.first == j.first;
+}
+
+static void DumpDataInCode(const char *bytes, uint64_t Size,
+ unsigned short Kind) {
+ uint64_t Value;
+
+ switch (Kind) {
+ case macho::Data:
+ switch (Size) {
+ case 4:
+ Value = bytes[3] << 24 |
+ bytes[2] << 16 |
+ bytes[1] << 8 |
+ bytes[0];
+ outs() << "\t.long " << Value;
+ break;
+ case 2:
+ Value = bytes[1] << 8 |
+ bytes[0];
+ outs() << "\t.short " << Value;
+ break;
+ case 1:
+ Value = bytes[0];
+ outs() << "\t.byte " << Value;
+ break;
+ }
+ outs() << "\t@ KIND_DATA\n";
+ break;
+ case macho::JumpTable8:
+ Value = bytes[0];
+ outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
+ break;
+ case macho::JumpTable16:
+ Value = bytes[1] << 8 |
+ bytes[0];
+ outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
+ break;
+ case macho::JumpTable32:
+ Value = bytes[3] << 24 |
+ bytes[2] << 16 |
+ bytes[1] << 8 |
+ bytes[0];
+ outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
+ break;
+ default:
+ outs() << "\t@ data in code kind = " << Kind << "\n";
+ break;
+ }
+}
+
static void
getSectionsAndSymbols(const macho::Header Header,
MachOObjectFile *MachOObj,
std::vector<SectionRef> &Sections,
std::vector<SymbolRef> &Symbols,
- SmallVectorImpl<uint64_t> &FoundFns) {
+ SmallVectorImpl<uint64_t> &FoundFns,
+ uint64_t &BaseSegmentAddress) {
error_code ec;
for (symbol_iterator SI = MachOObj->begin_symbols(),
SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
@@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Header Header,
MachOObjectFile::LoadCommandInfo Command =
MachOObj->getFirstLoadCommandInfo();
+ bool BaseSegmentAddressSet = false;
for (unsigned i = 0; ; ++i) {
if (Command.C.Type == macho::LCT_FunctionStarts) {
// We found a function starts segment, parse the addresses for later
@@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Header Header,
MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
}
+ else if (Command.C.Type == macho::LCT_Segment) {
+ macho::SegmentLoadCommand SLC =
+ MachOObj->getSegmentLoadCommand(Command);
+ StringRef SegName = SLC.Name;
+ if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
+ BaseSegmentAddressSet = true;
+ BaseSegmentAddress = SLC.VMAddress;
+ }
+ }
if (i == Header.NumLoadCommands - 1)
break;
@@ -184,14 +255,32 @@ static void DisassembleInputMachO2(StringRef Filename,
std::vector<SectionRef> Sections;
std::vector<SymbolRef> Symbols;
SmallVector<uint64_t, 8> FoundFns;
+ uint64_t BaseSegmentAddress;
- getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
+ getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
+ BaseSegmentAddress);
// Make a copy of the unsorted symbol list. FIXME: duplication
std::vector<SymbolRef> UnsortedSymbols(Symbols);
// Sort the symbols by address, just in case they didn't come in that way.
std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
+ // Build a data in code table that is sorted on by the address of each entry.
+ uint64_t BaseAddress = 0;
+ if (Header.FileType == macho::HFT_Object)
+ Sections[0].getAddress(BaseAddress);
+ else
+ BaseAddress = BaseSegmentAddress;
+ DiceTable Dices;
+ error_code ec;
+ for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
+ DI != DE; DI.increment(ec)){
+ uint32_t Offset;
+ DI->getOffset(Offset);
+ Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
+ }
+ array_pod_sort(Dices.begin(), Dices.end());
+
#ifndef NDEBUG
raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
#else
@@ -309,12 +398,29 @@ static void DisassembleInputMachO2(StringRef Filename,
for (uint64_t Index = Start; Index < End; Index += Size) {
MCInst Inst;
+ uint64_t SectAddress = 0;
+ Sections[SectIdx].getAddress(SectAddress);
+ outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
+
+ // Check the data in code table here to see if this is data not an
+ // instruction to be disassembled.
+ DiceTable Dice;
+ Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
+ dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
+ Dice.begin(), Dice.end(),
+ compareDiceTableEntries);
+ if (DTI != Dices.end()){
+ uint16_t Length;
+ DTI->second.getLength(Length);
+ DumpBytes(StringRef(Bytes.data() + Index, Length));
+ uint16_t Kind;
+ DTI->second.getKind(Kind);
+ DumpDataInCode(Bytes.data() + Index, Length, Kind);
+ continue;
+ }
+
if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
DebugOut, nulls())) {
- uint64_t SectAddress = 0;
- Sections[SectIdx].getAddress(SectAddress);
- outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
-
DumpBytes(StringRef(Bytes.data() + Index, Size));
IP->printInst(&Inst, outs(), "");
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
index 88fd4529ab..897a785f41 100644
--- a/tools/macho-dump/macho-dump.cpp
+++ b/tools/macho-dump/macho-dump.cpp
@@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObjectFile &Obj,
<< " ('datasize', " << LLC.DataSize << ")\n"
<< " ('_data_regions', [\n";
- unsigned NumRegions = LLC.DataSize / 8;
+ unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
for (unsigned i = 0; i < NumRegions; ++i) {
macho::DataInCodeTableEntry DICE =
Obj.getDataInCodeTableEntry(LLC.DataOffset, i);