#include // _GNU_SOURCE must be defiend for unwind.h to expose some of the functions // that we want. If it isn't, then we define it and undefine it to make sure // that it doesn't impact the rest of the program. #ifndef _GNU_SOURCE # define _GNU_SOURCE 1 # include # undef _GNU_SOURCE #else # include #endif #include typedef unsigned char *dw_eh_ptr_t; // Flag indicating a signed quantity #define DW_EH_PE_signed 0x08 /// DWARF data encoding types enum dwarf_data_encoding { // Unsigned, little-endian, base 128-encoded (variable length) DW_EH_PE_uleb128 = 0x01, // uint16 DW_EH_PE_udata2 = 0x02, // uint32 DW_EH_PE_udata4 = 0x03, // uint64 DW_EH_PE_udata8 = 0x04, // Signed versions of the above: DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed, DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed, DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed, DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed }; static inline enum dwarf_data_encoding get_encoding(unsigned char x) { return (enum dwarf_data_encoding)(x & 0xf); } enum dwarf_data_relative { // Value is omitted DW_EH_PE_omit = 0xff, // Absolute pointer value DW_EH_PE_absptr = 0x00, // Value relative to program counter DW_EH_PE_pcrel = 0x10, // Value relative to the text segment DW_EH_PE_textrel = 0x20, // Value relative to the data segment DW_EH_PE_datarel = 0x30, // Value relative to the start of the function DW_EH_PE_funcrel = 0x40, // Aligned pointer (Not supported yet - are they actually used?) DW_EH_PE_aligned = 0x50, // Pointer points to address of real value DW_EH_PE_indirect = 0x80 }; static inline enum dwarf_data_relative get_base(unsigned char x) { return (enum dwarf_data_relative)(x & 0x70); } static int is_indirect(unsigned char x) { return (x & DW_EH_PE_indirect); } static inline int dwarf_size_of_fixed_size_field(unsigned char type) { // Low three bits indicate size... switch (type & 7) { case DW_EH_PE_udata2: return 2; case DW_EH_PE_udata4: return 4; case DW_EH_PE_udata8: return 8; case DW_EH_PE_absptr: return sizeof(void*); } abort(); } /** * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to * point to the end of the value. */ static uint64_t read_leb128(unsigned char** data, int *b) { uint64_t uleb = 0; unsigned int bit = 0; unsigned char digit = 0; // We have to read at least one octet, and keep reading until we get to one // with the high bit unset do { // This check is a bit too strict - we should also check the highest // bit of the digit. assert(bit < sizeof(uint64_t) * 8); // Get the base 128 digit digit = (**data) & 0x7f; // Add it to the current value uleb += digit << bit; // Increase the shift value bit += 7; // Proceed to the next octet (*data)++; // Terminate when we reach a value that does not have the high bit set // (i.e. which was not modified when we mask it with 0x7f) } while ((*(*data - 1)) != digit); *b = bit; return uleb; } static int64_t read_uleb128(unsigned char** data) { int b; return read_leb128(data, &b); } static int64_t read_sleb128(unsigned char** data) { int bits; // Read as if it's signed uint64_t uleb = read_leb128(data, &bits); // If the most significant bit read is 1, then we need to sign extend it if ((uleb >> (bits-1)) == 1) { // Sign extend by setting all bits in front of it to 1 uleb |= ((int64_t)-1) << bits; } return (int64_t)uleb; } static uint64_t read_value(char encoding, unsigned char **data) { enum dwarf_data_encoding type = get_encoding(encoding); uint64_t v; switch (type) { // Read fixed-length types #define READ(dwarf, type) \ case dwarf:\ v = (uint64_t)(*(type*)(*data));\ *data += sizeof(type);\ break; READ(DW_EH_PE_udata2, uint16_t) READ(DW_EH_PE_udata4, uint32_t) READ(DW_EH_PE_udata8, uint64_t) READ(DW_EH_PE_sdata2, int16_t) READ(DW_EH_PE_sdata4, int32_t) READ(DW_EH_PE_sdata8, int64_t) READ(DW_EH_PE_absptr, intptr_t) #undef READ case DW_EH_PE_sleb128: v = read_sleb128(data); break; case DW_EH_PE_uleb128: v = read_uleb128(data); break; default: abort(); } return v; } static uint64_t resolve_indirect_value(_Unwind_Context *c, unsigned char encoding, int64_t v, dw_eh_ptr_t start) { switch (get_base(encoding)) { case DW_EH_PE_pcrel: v += (uint64_t)start; break; case DW_EH_PE_textrel: v += (uint64_t)_Unwind_GetTextRelBase(c); break; case DW_EH_PE_datarel: v += (uint64_t)_Unwind_GetDataRelBase(c); break; case DW_EH_PE_funcrel: v += (uint64_t)_Unwind_GetRegionStart(c); default: break; } // If this is an indirect value, then it is really the address of the real // value // TODO: Check whether this should really always be a pointer - it seems to // be a GCC extensions, so not properly documented... if (is_indirect(encoding)) { v = (uint64_t)(uintptr_t)*(void**)v; } return v; } static inline void read_value_with_encoding(_Unwind_Context *context, dw_eh_ptr_t *data, uint64_t *out) { dw_eh_ptr_t start = *data; unsigned char encoding = *((*data)++); // If this value is omitted, skip it and don't touch the output value if (encoding == DW_EH_PE_omit) { return; } *out = read_value(encoding, data); *out = resolve_indirect_value(context, encoding, *out, start); } struct dwarf_eh_lsda { dw_eh_ptr_t region_start; dw_eh_ptr_t landing_pads; dw_eh_ptr_t type_table; unsigned char type_table_encoding; dw_eh_ptr_t call_site_table; dw_eh_ptr_t action_table; unsigned char callsite_encoding; }; static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context, unsigned char *data) { struct dwarf_eh_lsda lsda; lsda.region_start = (dw_eh_ptr_t)(uintptr_t)_Unwind_GetRegionStart(context); // If the landing pads are relative to anything other than the start of // this region, find out where. This is @LPStart in the spec, although the // encoding that GCC uses does not quite match the spec. uint64_t v = (uint64_t)(uintptr_t)lsda.region_start; read_value_with_encoding(context, &data, &v); lsda.landing_pads = (dw_eh_ptr_t)(uintptr_t)v; // If there is a type table, find out where it is. This is @TTBase in the // spec. Note: we find whether there is a type table pointer by checking // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the // spec says, but does seem to be how G++ indicates this. lsda.type_table = 0; lsda.type_table_encoding = *data++; if (lsda.type_table_encoding != DW_EH_PE_omit) { v = read_uleb128(&data); dw_eh_ptr_t type_table = data; type_table += v; lsda.type_table = type_table; //lsda.type_table = (uintptr_t*)(data + v); } lsda.callsite_encoding = (enum dwarf_data_encoding)(*(data++)); // Action table is immediately after the call site table lsda.action_table = data; uintptr_t callsite_size = (uintptr_t)read_uleb128(&data); lsda.action_table = data + callsite_size; // Call site table is immediately after the header lsda.call_site_table = (dw_eh_ptr_t)data; return lsda; } struct dwarf_eh_action { dw_eh_ptr_t landing_pad; dw_eh_ptr_t action_record; }; /** * Look up the landing pad that corresponds to the current invoke. * Returns true if record exists. */ static bool dwarf_eh_find_callsite(struct _Unwind_Context *context, struct dwarf_eh_lsda *lsda, struct dwarf_eh_action *result) { result->action_record = 0; result->landing_pad = 0; uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context); unsigned char *callsite_table = (unsigned char*)lsda->call_site_table; while (callsite_table <= lsda->action_table) { // Once again, the layout deviates from the spec. uint64_t call_site_start, call_site_size, landing_pad, action; call_site_start = read_value(lsda->callsite_encoding, &callsite_table); call_site_size = read_value(lsda->callsite_encoding, &callsite_table); // Call site entries are started if (call_site_start > ip) { break; } landing_pad = read_value(lsda->callsite_encoding, &callsite_table); action = read_uleb128(&callsite_table); // we shold not include call_site_start (begin of the region) // address in ip range. For each call site // // address1: call proc // address2: next instruction // // call stack contains address2 and not address1. // address1 can be at the end of another EH region. if (call_site_start < ip && ip <= call_site_start + call_site_size) { if (action) { // Action records are 1-biased so both no-record and zeroth // record can be stored. result->action_record = lsda->action_table + action - 1; } // No landing pad means keep unwinding. if (landing_pad) { // Landing pad is the offset from the value in the header result->landing_pad = lsda->landing_pads + landing_pad; } return true; } } return false; }