forked from vnmakarov/mir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mir-aarch64.c
375 lines (341 loc) · 15.5 KB
/
mir-aarch64.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/* This file is a part of MIR project.
Copyright (C) 2018-2020 Vladimir Makarov <[email protected]>.
*/
#define VA_LIST_IS_ARRAY_P 0
// _MIR_get_thunk, _MIR_redirect_thunk, _MIR_get_interp_shim, _MIR_get_ff_call, _MIR_get_wrapper
void *_MIR_get_bstart_builtin (MIR_context_t ctx) {
static const uint32_t bstart_code[] = {
0x910003e0, /* r0 = rsp */
0xd65f03c0, /* ret r30 */
};
return _MIR_publish_code (ctx, (uint8_t *) bstart_code, sizeof (bstart_code));
}
void *_MIR_get_bend_builtin (MIR_context_t ctx) {
static const uint32_t bend_code[] = {
0x9100001f, /* rsp = r0 */
0xd65f03c0, /* ret r30 */
};
return _MIR_publish_code (ctx, (uint8_t *) bend_code, sizeof (bend_code));
}
struct aarch64_va_list {
/* address following the last (highest addressed) named incoming
argument on the stack, rounded upwards to a multiple of 8 bytes,
or if there are no named arguments on the stack, then the value
of the stack pointer when the function was entered. */
void *__stack;
/* the address of the byte immediately following the general
register argument save area, the end of the save area being
aligned to a 16 byte boundary. */
void *__gr_top;
/* the address of the byte immediately following the FP/SIMD
register argument save area, the end of the save area being
aligned to a 16 byte boundary. */
void *__vr_top;
int __gr_offs; /* set to 0 – ((8 – named_gr) * 8) */
int __vr_offs; /* set to 0 – ((8 – named_vr) * 16) */
};
void *va_arg_builtin (void *p, uint64_t t) {
struct aarch64_va_list *va = p;
MIR_type_t type = t;
int fp_p = type == MIR_T_F || type == MIR_T_D || type == MIR_T_LD;
void *a;
if (fp_p && va->__vr_offs < 0) {
a = (char *) va->__vr_top + va->__vr_offs;
va->__vr_offs += 16;
} else if (!fp_p && va->__gr_offs < 0) {
a = (char *) va->__gr_top + va->__gr_offs;
va->__gr_offs += 8;
} else {
if (type == MIR_T_LD) va->__stack = (void *) (((uint64_t) va->__stack + 15) % 16);
a = va->__stack;
va->__stack = (char *) va->__stack + (type == MIR_T_LD ? 16 : 8);
}
return a;
}
void va_start_interp_builtin (MIR_context_t ctx, void *p, void *a) {
struct aarch64_va_list *va = p;
va_list *vap = a;
assert (sizeof (struct aarch64_va_list) == sizeof (va_list));
*va = *(struct aarch64_va_list *) vap;
}
void va_end_interp_builtin (MIR_context_t ctx, void *p) {}
static int setup_imm64_insns (MIR_context_t ctx, uint32_t *to, int reg, uint64_t imm64) {
/* xd=imm64 */
static const uint32_t imm64_pat[] = {
0xd2800000, /* 0: mov xd, xxxx(0-15) */
0xf2a00000, /* 4: movk xd, xxxx(16-31) */
0xf2c00000, /* 8: movk xd, xxxx(32-47) */
0xf2e00000, /* 12: movk xd, xxxx(48-63) */
};
uint32_t mask = ~(0xffff << 5);
mir_assert (0 <= reg && reg <= 31);
to[0] = (imm64_pat[0] & mask) | ((uint32_t) (imm64 & 0xffff) << 5) | reg;
to[1] = (imm64_pat[1] & mask) | (((uint32_t) (imm64 >> 16) & 0xffff) << 5) | reg;
to[2] = (imm64_pat[2] & mask) | (((uint32_t) (imm64 >> 32) & 0xffff) << 5) | reg;
to[3] = (imm64_pat[3] & mask) | (((uint32_t) (imm64 >> 48) & 0xffff) << 5) | reg;
return sizeof (imm64_pat) / sizeof (uint32_t);
}
static void push_insns (MIR_context_t ctx, const uint32_t *pat, size_t pat_len) {
uint8_t *p = (uint8_t *) pat;
for (size_t i = 0; i < pat_len; i++) VARR_PUSH (uint8_t, machine_insns, p[i]);
}
static size_t gen_mov_addr (MIR_context_t ctx, int reg, void *addr) {
uint32_t insns[4];
int insns_num = setup_imm64_insns (ctx, insns, reg, (uint64_t) addr);
mir_assert (insns_num == 4 && sizeof (insns) == insns_num * sizeof (uint32_t));
push_insns (ctx, insns, insns_num * sizeof (uint32_t));
return insns_num * sizeof (uint32_t);
}
#define BR_OFFSET_BITS 26
#define MAX_BR_OFFSET (1 << (BR_OFFSET_BITS - 1)) /* 1 for sign */
#define BR_OFFSET_MASK (~(-1 << BR_OFFSET_BITS))
static void gen_call_addr (MIR_context_t ctx, void *base_addr, int temp_reg, void *call_addr) {
static const uint32_t call_pat1 = 0x94000000; /* bl x */
static const uint32_t call_pat2 = 0xd63f0000; /* blr x */
uint32_t insn;
int64_t offset = (uint32_t *) call_addr - (uint32_t *) base_addr;
mir_assert (0 <= temp_reg && temp_reg <= 31);
if (base_addr != NULL && -(int64_t) MAX_BR_OFFSET <= offset && offset < (int64_t) MAX_BR_OFFSET) {
insn = call_pat1 | ((uint32_t) offset & BR_OFFSET_MASK);
} else {
gen_mov_addr (ctx, temp_reg, call_addr);
insn = call_pat2 | (temp_reg << 5);
}
push_insns (ctx, &insn, sizeof (insn));
}
#define NOP 0xd503201f
void *_MIR_get_thunk (MIR_context_t ctx) {
int pat[5] = {NOP, NOP, NOP, NOP, NOP}; /* maximal size thunk -- see _MIR_redirect_thunk */
return _MIR_publish_code (ctx, (uint8_t *) pat, sizeof (pat));
}
void _MIR_redirect_thunk (MIR_context_t ctx, void *thunk, void *to) {
static const uint32_t branch_pat1 = 0xd61f0120; /* br x9 */
static const uint32_t branch_pat2 = 0x14000000; /* b x */
int64_t offset = (uint32_t *) to - (uint32_t *) thunk;
uint32_t code[5];
mir_assert (((uint64_t) thunk & 0x3) == 0 && ((uint64_t) to & 0x3) == 0); /* alignment */
if (-(int64_t) MAX_BR_OFFSET <= offset && offset < (int64_t) MAX_BR_OFFSET) {
code[0] = branch_pat2 | ((uint32_t) offset & BR_OFFSET_MASK);
_MIR_change_code (ctx, thunk, (uint8_t *) &code[0], sizeof (code[0]));
} else {
int n = setup_imm64_insns (ctx, code, 9, (uint64_t) to);
mir_assert (n == 4);
code[4] = branch_pat1;
_MIR_change_code (ctx, thunk, (uint8_t *) code, sizeof (code));
}
}
/* save r0-r7, v0-v7 */
static const uint32_t save_insns[] = {
0xa9bf1fe6, /* stp R6, R7, [SP, #-16]! */
0xa9bf17e4, /* stp R4, R5, [SP, #-16]! */
0xa9bf0fe2, /* stp R2, R3, [SP, #-16]! */
0xa9bf07e0, /* stp R0, R1, [SP, #-16]! */
0xadbf1fe6, /* stp Q6, Q7, [SP, #-32]! */
0xadbf17e4, /* stp Q4, Q5, [SP, #-32]! */
0xadbf0fe2, /* stp Q2, Q3, [SP, #-32]! */
0xadbf07e0, /* stp Q0, Q1, [SP, #-32]! */
};
static const uint32_t restore_insns[] = {
0xacc107e0, /* ldp Q0, Q1, SP, #32 */
0xacc10fe2, /* ldp Q2, Q3, SP, #32 */
0xacc117e4, /* ldp Q4, Q5, SP, #32 */
0xacc11fe6, /* ldp Q6, Q7, SP, #32 */
0xa8c107e0, /* ldp R0, R1, SP, #16 */
0xa8c10fe2, /* ldp R2, R3, SP, #16 */
0xa8c117e4, /* ldp R4, R5, SP, #16 */
0xa8c11fe6, /* ldp R6, R7, SP, #16 */
};
static const uint32_t ld_pat = 0xf9400260; /* ldr x, [x19], offset */
static const uint32_t lds_pat = 0xbd400260; /* ldr s, [x19], offset */
static const uint32_t ldd_pat = 0xfd400260; /* ldr d, [x19], offset */
static const uint32_t ldld_pat = 0x3dc00260; /* ldr q, [x19], offset */
/* Generation: fun (fun_addr, res_arg_addresses):
push x19, x30; sp-=sp_offset; x9=fun_addr; x19=res/arg_addrs
x8=mem[x19,<offset>]; (arg_reg=mem[x8] or x8=mem[x8];mem[sp,sp_offset]=x8) ...
call fun_addr; sp+=offset
x8=mem[x19,<offset>]; res_reg=mem[x8]; ...
pop x19, x30; ret x30. */
void *_MIR_get_ff_call (MIR_context_t ctx, size_t nres, MIR_type_t *res_types, size_t nargs,
MIR_type_t *arg_types, int vararg_p) {
static const uint32_t prolog[] = {
0xa9bf7bf3, /* stp x19,x30,[sp, -16]! */
0xd10003ff, /* sub sp,sp,<sp_offset> */
0xaa0003e9, /* mov x9,x0 # fun addr */
0xaa0103f3, /* mov x19, x1 # result/arg addresses */
};
static const uint32_t call_end[] = {
0xd63f0120, /* blr x9 */
0x910003ff, /* add sp,sp,<sp_offset> */
};
static const uint32_t epilog[] = {
0xa8c17bf3, /* ldp x19,x30,[sp],16 */
0xd65f03c0, /* ret x30 */
};
static const uint32_t st_pat = 0xf9000000; /* str x, [xn|sp], offset */
static const uint32_t sts_pat = 0xbd000000; /* str s, [xn|sp], offset */
static const uint32_t std_pat = 0xfd000000; /* str d, [xn|sp], offset */
static const uint32_t stld_pat = 0x3d800000; /* str q, [xn|sp], offset */
uint32_t n_xregs = 0, n_vregs = 0, sp_offset = 0, pat, offset_imm, scale, sp = 31;
uint32_t *addr;
const uint32_t temp_reg = 8; /* x8 or v9 */
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, prolog, sizeof (prolog));
mir_assert (sizeof (long double) == 16);
for (size_t i = 0; i < nargs; i++) { /* args */
scale = arg_types[i] == MIR_T_F ? 2 : arg_types[i] == MIR_T_LD ? 4 : 3;
offset_imm = (((i + nres) * sizeof (long double) << 10)) >> scale;
if ((MIR_T_I8 <= arg_types[i] && arg_types[i] <= MIR_T_U64) || arg_types[i] == MIR_T_P) {
if (n_xregs < 8) {
pat = ld_pat | offset_imm | n_xregs++;
} else {
pat = ld_pat | offset_imm | temp_reg;
push_insns (ctx, &pat, sizeof (pat));
pat = st_pat | ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
sp_offset += 8;
}
push_insns (ctx, &pat, sizeof (pat));
} else if (arg_types[i] == MIR_T_F || arg_types[i] == MIR_T_D || arg_types[i] == MIR_T_LD) {
pat = arg_types[i] == MIR_T_F ? lds_pat : arg_types[i] == MIR_T_D ? ldd_pat : ldld_pat;
if (n_vregs < 8) {
pat |= offset_imm | n_vregs++;
} else {
if (arg_types[i] == MIR_T_LD) sp_offset = (sp_offset + 15) % 16;
pat |= offset_imm | temp_reg;
push_insns (ctx, &pat, sizeof (pat));
pat = arg_types[i] == MIR_T_F ? sts_pat : arg_types[i] == MIR_T_D ? std_pat : stld_pat;
pat |= ((sp_offset >> scale) << 10) | temp_reg | (sp << 5);
sp_offset += arg_types[i] == MIR_T_LD ? 16 : 8;
}
push_insns (ctx, &pat, sizeof (pat));
} else {
(*error_func) (MIR_call_op_error, "wrong type of arg value");
}
}
sp_offset = (sp_offset + 15) / 16 * 16;
mir_assert (sp_offset < (1 << 12));
((uint32_t *) VARR_ADDR (uint8_t, machine_insns))[1] |= sp_offset << 10; /* sub sp,sp,<offset> */
push_insns (ctx, call_end, sizeof (call_end));
((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-1]
|= sp_offset << 10;
n_xregs = n_vregs = 0;
for (size_t i = 0; i < nres; i++) { /* results */
offset_imm = i * sizeof (long double) << 10;
offset_imm >>= res_types[i] == MIR_T_F ? 2 : res_types[i] == MIR_T_D ? 3 : 4;
if (((MIR_T_I8 <= res_types[i] && res_types[i] <= MIR_T_U64) || res_types[i] == MIR_T_P)
&& n_xregs < 8) {
pat = st_pat | offset_imm | n_xregs++ | (19 << 5);
push_insns (ctx, &pat, sizeof (pat));
} else if ((res_types[i] == MIR_T_F || res_types[i] == MIR_T_D || res_types[i] == MIR_T_LD)
&& n_vregs < 8) {
pat = res_types[i] == MIR_T_F ? sts_pat : res_types[i] == MIR_T_D ? std_pat : stld_pat;
pat |= offset_imm | n_vregs++ | (19 << 5);
push_insns (ctx, &pat, sizeof (pat));
} else {
(*error_func) (MIR_ret_error, "x86-64 can not handle this combination of return values");
}
}
push_insns (ctx, epilog, sizeof (epilog));
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
/* Transform C call to call of void handler (MIR_context_t ctx, MIR_item_t func_item,
va_list va, MIR_val_t *results) */
void *_MIR_get_interp_shim (MIR_context_t ctx, MIR_item_t func_item, void *handler) {
static const uint32_t save_x19_pat = 0xf81f0ff3; /* str x19, [sp,-16]! */
static const uint32_t prepare_pat[] = {
0xd10083ff, /* sub sp, sp, 32 # allocate va_list */
0x910003e8, /* mov x8, sp # va_list addr */
0x128007e9, /* mov w9, #-64 # gr_offs */
0xb9001909, /* str w9,[x8, 24] # va_list.gr_offs */
0x12800fe9, /* mov w9, #-128 # vr_offs */
0xb9001d09, /* str w9,[x8, 28] #va_list.vr_offs */
0x910383e9, /* add x9, sp, #224 # gr_top */
0xf9000509, /* str x9,[x8, 8] # va_list.gr_top */
0x91004129, /* add x9, x9, #16 # stack */
0xf9000109, /* str x9,[x8] # valist.stack */
0x910283e9, /* add x9, sp, #160 # vr_top*/
0xf9000909, /* str x9,[x8, 16] # va_list.vr_top */
0xaa0803e2, /* mov x2, x8 # va arg */
0xd2800009, /* mov x9, <(nres+1)*16> */
0xcb2963ff, /* sub sp, sp, x9 */
0x910043e3, /* add x3, sp, 16 # results arg */
0xaa0303f3, /* mov x19, x3 # results */
0xf90003fe, /* str x30, [sp] # save lr */
};
static const uint32_t shim_end[] = {
0xf94003fe, /* ldr x30, [sp] */
0xd2800009, /* mov x9, 224+(nres+1)*16 */
0x8b2963ff, /* add sp, sp, x9 */
0xf84107f3, /* ldr x19, sp, 16 */
0xd65f03c0, /* ret x30 */
};
uint32_t pat, imm, n_xregs, n_vregs, offset, offset_imm;
uint32_t nres = func_item->u.func->nres;
MIR_type_t *results = func_item->u.func->res_types;
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, &save_x19_pat, sizeof (save_x19_pat));
push_insns (ctx, save_insns, sizeof (save_insns));
push_insns (ctx, prepare_pat, sizeof (prepare_pat));
imm = (nres + 1) * 16;
mir_assert (imm < (1 << 16));
((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-5]
|= imm << 5;
gen_mov_addr (ctx, 0, ctx); /* mov x0, ctx */
gen_mov_addr (ctx, 1, func_item); /* mov x1, func_item */
gen_call_addr (ctx, NULL, 9, handler);
/* move results: */
n_xregs = n_vregs = offset = 0;
mir_assert (sizeof (long double) == 16);
for (uint32_t i = 0; i < nres; i++) {
if ((results[i] == MIR_T_F || results[i] == MIR_T_D || results[i] == MIR_T_LD) && n_vregs < 8) {
pat = results[i] == MIR_T_F ? lds_pat : results[i] == MIR_T_D ? ldd_pat : ldld_pat;
pat |= n_vregs;
n_vregs++;
} else if (n_xregs < 8) { // ??? ltp use
pat = ld_pat | n_xregs;
n_xregs++;
} else {
(*error_func) (MIR_ret_error, "aarch64 can not handle this combination of return values");
}
offset_imm = offset >> (results[i] == MIR_T_F ? 2 : results[i] == MIR_T_LD ? 4 : 3);
mir_assert (offset_imm < (1 << 12));
pat |= offset_imm << 10;
push_insns (ctx, &pat, sizeof (pat));
offset += 16;
}
push_insns (ctx, shim_end, sizeof (shim_end));
imm = 224 + (nres + 1) * 16;
mir_assert (imm < (1 << 16));
((uint32_t *) (VARR_ADDR (uint8_t, machine_insns) + VARR_LENGTH (uint8_t, machine_insns)))[-4]
|= imm << 5;
return _MIR_publish_code (ctx, VARR_ADDR (uint8_t, machine_insns),
VARR_LENGTH (uint8_t, machine_insns));
}
/* Save regs x0-x7, q0-q7; x9 = call hook_address (ctx, called_func); restore regs; br x9 */
void *_MIR_get_wrapper (MIR_context_t ctx, MIR_item_t called_func, void *hook_address) {
static const uint32_t jmp_insn = 0xd61f0120; /* br x9 */
static const uint32_t move_insn = 0xaa0003e9; /* mov x9, x0 */
static const uint32_t save_fplr = 0xa9bf7bfd; /* stp R29, R30, [SP, #-16]! */
static const uint32_t restore_fplr = 0xa8c17bfd; /* ldp R29, R30, SP, #16 */
uint8_t *base_addr, *curr_addr, *code;
size_t len = sizeof (save_insns) + sizeof (restore_insns); /* initial code length */
for (;;) {
curr_addr = base_addr = _MIR_get_new_code_addr (ctx, len);
if (curr_addr == NULL) return NULL;
VARR_TRUNC (uint8_t, machine_insns, 0);
push_insns (ctx, &save_fplr, sizeof (save_fplr));
curr_addr += 4;
push_insns (ctx, save_insns, sizeof (save_insns));
curr_addr += sizeof (save_insns);
curr_addr += gen_mov_addr (ctx, 0, ctx); /*mov x0,ctx */
curr_addr += gen_mov_addr (ctx, 1, called_func); /*mov x1,called_func */
gen_call_addr (ctx, curr_addr, 10, hook_address); /*call <hook_address>, use x10 as temp */
push_insns (ctx, &move_insn, sizeof (move_insn));
push_insns (ctx, restore_insns, sizeof (restore_insns));
push_insns (ctx, &restore_fplr, sizeof (restore_fplr));
push_insns (ctx, &jmp_insn, sizeof (jmp_insn));
len = VARR_LENGTH (uint8_t, machine_insns);
code = _MIR_publish_code_by_addr (ctx, base_addr, VARR_ADDR (uint8_t, machine_insns), len);
if (code != NULL) return code;
}
}