summaryrefslogtreecommitdiff
path: root/src/dwarf_eh.h
blob: 02b9db9820ee04468349e9ef94e912bfcb9c564e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#include <assert.h>

// _GNU_SOURCE must be defiend for unwind.h to expose some of the functions
// that we want.  If it isn't, then we define it and undefine it to make sure
// that it doesn't impact the rest of the program.
#ifndef _GNU_SOURCE
#	define _GNU_SOURCE 1
#	include <unwind.h>
#	undef _GNU_SOURCE
#else
#	include <unwind.h>
#endif

#include <stdint.h>

typedef unsigned char *dw_eh_ptr_t;
// Flag indicating a signed quantity
#define DW_EH_PE_signed 0x08
/// DWARF data encoding types
enum dwarf_data_encoding
{
	// Unsigned, little-endian, base 128-encoded (variable length)
	DW_EH_PE_uleb128 = 0x01,
	// uint16
	DW_EH_PE_udata2  = 0x02,
	// uint32
	DW_EH_PE_udata4  = 0x03,
	// uint64
	DW_EH_PE_udata8  = 0x04,
	// Signed versions of the above:
	DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
	DW_EH_PE_sdata2  = DW_EH_PE_udata2 | DW_EH_PE_signed,
	DW_EH_PE_sdata4  = DW_EH_PE_udata4 | DW_EH_PE_signed,
	DW_EH_PE_sdata8  = DW_EH_PE_udata8 | DW_EH_PE_signed
};

static inline enum dwarf_data_encoding get_encoding(unsigned char x)
{
	return (enum dwarf_data_encoding)(x & 0xf);
}

enum dwarf_data_relative
{
	// Value is omitted
	DW_EH_PE_omit     = 0xff,
	// Absolute pointer value
	DW_EH_PE_absptr   = 0x00,
	// Value relative to program counter
	DW_EH_PE_pcrel    = 0x10,
	// Value relative to the text segment
	DW_EH_PE_textrel  = 0x20,
	// Value relative to the data segment
	DW_EH_PE_datarel  = 0x30,
	// Value relative to the start of the function
	DW_EH_PE_funcrel  = 0x40,
	// Aligned pointer (Not supported yet - are they actually used?)
	DW_EH_PE_aligned  = 0x50,
	// Pointer points to address of real value
	DW_EH_PE_indirect = 0x80
};
static inline enum dwarf_data_relative get_base(unsigned char x)
{
	return (enum dwarf_data_relative)(x & 0x70);
}
static int is_indirect(unsigned char x)
{
	return (x & DW_EH_PE_indirect);
}

static inline int dwarf_size_of_fixed_size_field(unsigned char type)
{
	// Low three bits indicate size...
	switch (type & 7)
	{
		case DW_EH_PE_udata2: return 2;
		case DW_EH_PE_udata4: return 4;
		case DW_EH_PE_udata8: return 8;
		case DW_EH_PE_absptr: return sizeof(void*);
	}
	abort();
}

/** 
 * Read an unsigned, little-endian, base-128, DWARF value.  Updates *data to
 * point to the end of the value.
 */
static uint64_t read_leb128(unsigned char** data, int *b)
{
	uint64_t uleb = 0;
	unsigned int bit = 0;
	unsigned char digit = 0;
	// We have to read at least one octet, and keep reading until we get to one
	// with the high bit unset
	do
	{
		// This check is a bit too strict - we should also check the highest
		// bit of the digit.
		assert(bit < sizeof(uint64_t) * 8);
		// Get the base 128 digit 
		digit = (**data) & 0x7f;
		// Add it to the current value
		uleb += digit << bit;
		// Increase the shift value
		bit += 7;
		// Proceed to the next octet
		(*data)++;
		// Terminate when we reach a value that does not have the high bit set
		// (i.e. which was not modified when we mask it with 0x7f)
	} while ((*(*data - 1)) != digit);
	*b = bit;

	return uleb;
}

static int64_t read_uleb128(unsigned char** data)
{
	int b;
	return read_leb128(data, &b);
}


static int64_t read_sleb128(unsigned char** data)
{
	int bits;
	// Read as if it's signed
	uint64_t uleb = read_leb128(data, &bits);
	// If the most significant bit read is 1, then we need to sign extend it
	if (uleb >> bits-1 == 1)
	{
		// Sign extend by setting all bits in front of it to 1
		uleb |= ((int64_t)-1) << bits;
	}
	return (int64_t)uleb;
}

static uint64_t read_value(char encoding, unsigned char **data)
{
	enum dwarf_data_encoding type = get_encoding(encoding);
	uint64_t v;
	switch (type)
	{
		// Read fixed-length types
#define READ(dwarf, type) \
		case dwarf:\
			v = (uint64_t)(*(type*)(*data));\
			*data += sizeof(type);\
			break;
		READ(DW_EH_PE_udata2, uint16_t)
		READ(DW_EH_PE_udata4, uint32_t)
		READ(DW_EH_PE_udata8, uint64_t)
		READ(DW_EH_PE_sdata2, int16_t)
		READ(DW_EH_PE_sdata4, int32_t)
		READ(DW_EH_PE_sdata8, int64_t)
		READ(DW_EH_PE_absptr, intptr_t)
#undef READ
		case DW_EH_PE_sleb128:
			v = read_sleb128(data);
			break;
		case DW_EH_PE_uleb128:
			v = read_uleb128(data);
			break;
		default: abort();
	}

	return v;
}

static uint64_t resolve_indirect_value(_Unwind_Context *c, unsigned char encoding, int64_t v, dw_eh_ptr_t start)
{
	switch (get_base(encoding))
	{
		case DW_EH_PE_pcrel:
			v += (uint64_t)start;
			break;
		case DW_EH_PE_textrel:
			v += (uint64_t)_Unwind_GetTextRelBase(c);
			break;
		case DW_EH_PE_datarel:
			v += (uint64_t)_Unwind_GetDataRelBase(c);
			break;
		case DW_EH_PE_funcrel:
			v += (uint64_t)_Unwind_GetRegionStart(c);
		default:
			break;
	}
	// If this is an indirect value, then it is really the address of the real
	// value
	// TODO: Check whether this should really always be a pointer - it seems to
	// be a GCC extensions, so not properly documented...
	if (is_indirect(encoding))
	{
		v = (uint64_t)(uintptr_t)*(void**)v;
	}
	return v;
}


static inline void read_value_with_encoding(_Unwind_Context *context,
                                            dw_eh_ptr_t *data,
                                            uint64_t *out)
{
	dw_eh_ptr_t start = *data;
	unsigned char encoding = *((*data)++);
	// If this value is omitted, skip it and don't touch the output value
	if (encoding == DW_EH_PE_omit) { return; }

	*out = read_value(encoding, data);
	*out = resolve_indirect_value(context, encoding, *out, start);
}


struct dwarf_eh_lsda
{
	dw_eh_ptr_t region_start;
	dw_eh_ptr_t landing_pads;
	dw_eh_ptr_t type_table;
	unsigned char type_table_encoding;
	dw_eh_ptr_t call_site_table;
	dw_eh_ptr_t action_table;
	unsigned char callsite_encoding;
};

static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context, unsigned char *data)
{
	struct dwarf_eh_lsda lsda;

	lsda.region_start = (dw_eh_ptr_t)(uintptr_t)_Unwind_GetRegionStart(context);

	// If the landing pads are relative to anything other than the start of
	// this region, find out where.  This is @LPStart in the spec, although the
	// encoding that GCC uses does not quite match the spec.
	uint64_t v = (uint64_t)(uintptr_t)lsda.region_start;
	read_value_with_encoding(context, &data, &v);
	lsda.landing_pads = (dw_eh_ptr_t)(uintptr_t)v;

	// If there is a type table, find out where it is.  This is @TTBase in the
	// spec.  Note: we find whether there is a type table pointer by checking
	// whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
	// spec says, but does seem to be how G++ indicates this.
	lsda.type_table = 0;
	lsda.type_table_encoding = *data++;
	if (lsda.type_table_encoding != DW_EH_PE_omit)
	{
		v = read_uleb128(&data);
		dw_eh_ptr_t type_table = data;
		type_table += v;
		lsda.type_table = type_table;
		//lsda.type_table = (uintptr_t*)(data + v);
	}

	lsda.callsite_encoding = (enum dwarf_data_encoding)(*(data++));

	// Action table is immediately after the call site table
	lsda.action_table = data;
	uintptr_t callsite_size = (uintptr_t)read_uleb128(&data);
	lsda.action_table = data + callsite_size;
	// Call site table is immediately after the header
	lsda.call_site_table = (dw_eh_ptr_t)data;


	return lsda;
}

struct dwarf_eh_action
{
	dw_eh_ptr_t landing_pad;
	dw_eh_ptr_t action_record;
};

/**
 * Look up the landing pad that corresponds to the current invoke.
 */
static struct dwarf_eh_action 
	dwarf_eh_find_callsite(struct _Unwind_Context *context, struct dwarf_eh_lsda *lsda)
{
	struct dwarf_eh_action result = { 0, 0};
	uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
	unsigned char *callsite_table = (unsigned char*)lsda->call_site_table;
	while (callsite_table <= lsda->action_table)
	{
		// Once again, the layout deviates from the spec.
		uint64_t call_site_start, call_site_size, landing_pad, action;
		call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
		call_site_size = read_value(lsda->callsite_encoding, &callsite_table);

		// Call site entries are started
		if (call_site_start > ip) { break; }

		landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
		action = read_uleb128(&callsite_table);

		if (call_site_start <= ip && ip <= call_site_start + call_site_size)
		{
			if (action)
			{
				// Action records are 1-biased so both no-record and zeroth
				// record can be stored.
				result.action_record = lsda->action_table + action - 1;
			}
			// No landing pad means keep unwinding.
			if (landing_pad)
			{
				// Landing pad is the offset from the value in the header
				result.landing_pad = lsda->landing_pads + landing_pad;
			}
			break;
		}
	}
	return result;
}