From 9c46483c518e26f56567860eb48851d60f2badef Mon Sep 17 00:00:00 2001 From: Andrei Lascu Date: Thu, 29 Feb 2024 13:03:10 +0000 Subject: [PATCH] Eager object symbol relocation Looking around in `printf`, we noticed it didn't work as objects were not being relocated across libraries. This PR enables this relocation; as well as adding a few extra tests. TODOs: * we do not properly handle GLOBAL vs LOCAL relocations; it might be a small optimization to ensure we hold the two symbol types in different containers, so that when we do relocate, we don't needlessly look around at LOCAL bound symbols; * WEAK bound symbols aren't relocated; * some of the added tests are currently not fully workingm mainly due to (we believe) WEAK bound symbols, which are planned to be fixed shortly. --- include/compartment.h | 2 + src/compartment.c | 141 +++++++++++++++++++++------- tests/CMakeLists.txt | 20 +++- tests/simple_call_internal_static.c | 16 ++++ tests/simple_call_internal_weak.c | 12 +++ tests/simple_fopen.c | 38 ++++++++ tests/simple_fputs.c | 12 +++ tests/simple_printf.c | 6 +- tests/simple_static_var-external.c | 1 + tests/simple_static_var.c | 10 ++ tests/simple_va_args.c | 26 +++++ 11 files changed, 246 insertions(+), 38 deletions(-) create mode 100644 tests/simple_call_internal_static.c create mode 100644 tests/simple_call_internal_weak.c create mode 100644 tests/simple_fopen.c create mode 100644 tests/simple_fputs.c create mode 100644 tests/simple_static_var-external.c create mode 100644 tests/simple_static_var.c create mode 100644 tests/simple_va_args.c diff --git a/include/compartment.h b/include/compartment.h index a1b82c2..5146432 100644 --- a/include/compartment.h +++ b/include/compartment.h @@ -114,6 +114,7 @@ struct LibRelaMapping char *rela_name; void *rela_address; // address of relocation in compartment void *target_func_address; // address of actual function + unsigned short rela_type; }; /* Struct representing a symbol entry of a dependency library @@ -122,6 +123,7 @@ struct LibDependencySymbol { char *sym_name; void *sym_offset; + unsigned short sym_type; }; /* Struct representing the result of searching for a library symbol in a diff --git a/src/compartment.c b/src/compartment.c index 0485b63..9019c9a 100644 --- a/src/compartment.c +++ b/src/compartment.c @@ -15,7 +15,7 @@ parse_lib_segs(Elf64_Ehdr *, int, struct LibDependency *, struct Compartment *); static void parse_lib_symtb(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); static void -parse_lib_relaplt(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); +parse_lib_rela(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); static void parse_lib_dynamic_deps(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); static void @@ -25,7 +25,7 @@ find_comp_intercepts(char **, void **, size_t, struct Compartment *); static void resolve_rela_syms(struct Compartment *); static struct LibSymSearchResult -find_lib_dep_sym_in_comp(const char *, struct Compartment *); +find_lib_dep_sym_in_comp(const char *, struct Compartment *, unsigned short); static void * extract_sym_offset(struct Compartment *, struct LibSymSearchResult); @@ -126,6 +126,7 @@ comp_from_elf(char *filename, char **entry_points, size_t entry_point_count, { struct LibDependency *parsed_lib = parse_lib_file(libs_to_parse[libs_parsed_count], new_comp); + print_lib_dep(parsed_lib); const unsigned short libs_to_search_count = libs_to_parse_count; for (size_t i = 0; i < parsed_lib->lib_dep_count; ++i) @@ -567,7 +568,7 @@ parse_lib_file(char *lib_name, struct Compartment *new_comp) // that this can be changed in future specifications. // // Source: https://refspecs.linuxfoundation.org/elf/elf.pdf - const size_t headers_of_interest_count = 3; + const size_t headers_of_interest_count = 4; size_t found_headers = 0; Elf64_Shdr curr_shdr; for (size_t i = 0; i < lib_ehdr.e_shnum; ++i) @@ -584,7 +585,13 @@ parse_lib_file(char *lib_name, struct Compartment *new_comp) else if (curr_shdr.sh_type == SHT_RELA && !strcmp(&shstrtab[curr_shdr.sh_name], ".rela.plt")) { - parse_lib_relaplt(&curr_shdr, &lib_ehdr, lib_fd, new_lib); + parse_lib_rela(&curr_shdr, &lib_ehdr, lib_fd, new_lib); + found_headers += 1; + } + else if (curr_shdr.sh_type == SHT_RELA + && !strcmp(&shstrtab[curr_shdr.sh_name], ".rela.dyn")) + { + parse_lib_rela(&curr_shdr, &lib_ehdr, lib_fd, new_lib); found_headers += 1; } // Lookup `.dynamic` to find library dependencies @@ -679,11 +686,6 @@ parse_lib_symtb(Elf64_Shdr *symtb_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, for (size_t j = 0; j < lib_dep->lib_syms_count; ++j) { curr_sym = sym_tb[j]; - // TODO only handling FUNC symbols for now - if (ELF64_ST_TYPE(curr_sym.st_info) != STT_FUNC) - { - continue; - } if (curr_sym.st_value == 0) { continue; @@ -692,6 +694,7 @@ parse_lib_symtb(Elf64_Shdr *symtb_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, char *sym_name = &str_tb[curr_sym.st_name]; ld_syms[actual_syms].sym_name = malloc(strlen(sym_name) + 1); strcpy(ld_syms[actual_syms].sym_name, sym_name); + ld_syms[actual_syms].sym_type = ELF64_ST_TYPE(curr_sym.st_info); actual_syms += 1; } ld_syms @@ -704,19 +707,18 @@ parse_lib_symtb(Elf64_Shdr *symtb_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, } static void -parse_lib_relaplt(Elf64_Shdr *rela_plt_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, +parse_lib_rela(Elf64_Shdr *rela_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, struct LibDependency *lib_dep) { // Traverse `.rela.plt`, so we can see which function addresses we need // to eagerly load - Elf64_Rela *rela_plt = malloc(rela_plt_shdr->sh_size); - do_pread( - lib_fd, rela_plt, rela_plt_shdr->sh_size, rela_plt_shdr->sh_offset); - size_t rela_count = rela_plt_shdr->sh_size / sizeof(Elf64_Rela); + Elf64_Rela *rela_sec = malloc(rela_shdr->sh_size); + do_pread(lib_fd, rela_sec, rela_shdr->sh_size, rela_shdr->sh_offset); + size_t rela_count = rela_shdr->sh_size / sizeof(Elf64_Rela); Elf64_Shdr dyn_sym_hdr; do_pread(lib_fd, &dyn_sym_hdr, sizeof(Elf64_Shdr), - lib_ehdr->e_shoff + rela_plt_shdr->sh_link * sizeof(Elf64_Shdr)); + lib_ehdr->e_shoff + rela_shdr->sh_link * sizeof(Elf64_Shdr)); Elf64_Sym *dyn_sym_tbl = malloc(dyn_sym_hdr.sh_size); do_pread(lib_fd, dyn_sym_tbl, dyn_sym_hdr.sh_size, dyn_sym_hdr.sh_offset); @@ -726,16 +728,36 @@ parse_lib_relaplt(Elf64_Shdr *rela_plt_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, char *dyn_str_tbl = malloc(dyn_str_hdr.sh_size); do_pread(lib_fd, dyn_str_tbl, dyn_str_hdr.sh_size, dyn_str_hdr.sh_offset); - lib_dep->rela_maps = malloc(rela_count * sizeof(struct LibRelaMapping)); - lib_dep->rela_maps_count = rela_count; + struct LibRelaMapping *new_relas + = malloc(rela_count * sizeof(struct LibRelaMapping)); // Log symbols that will need to be relocated eagerly at maptime Elf64_Rela curr_rela; - for (size_t j = 0; j < lib_dep->rela_maps_count; ++j) + size_t actual_relas = 0; + for (size_t j = 0; j < rela_count; ++j) { - curr_rela = rela_plt[j]; + curr_rela = rela_sec[j]; size_t curr_rela_sym_idx = ELF64_R_SYM(curr_rela.r_info); Elf64_Sym curr_rela_sym = dyn_sym_tbl[curr_rela_sym_idx]; + + // Filter out some `libc` symbols we don't want to handle + // TODO at least right now + if (!strcmp(&dyn_str_tbl[curr_rela_sym.st_name], "environ")) + { + warnx("Currently not relocating symbol `environ` from library %s - " + "using within a container might cause a crash.", + lib_dep->lib_name); + continue; + } + else if (!strcmp(&dyn_str_tbl[curr_rela_sym.st_name], "__progname")) + { + warnx( + "Currently not relocating symbol `__progname` from library %s " + "- using within a container might cause a crash.", + lib_dep->lib_name); + continue; + } + char *curr_rela_name = malloc(strlen(&dyn_str_tbl[curr_rela_sym.st_name]) + 1); strcpy(curr_rela_name, &dyn_str_tbl[curr_rela_sym.st_name]); @@ -744,16 +766,38 @@ parse_lib_relaplt(Elf64_Shdr *rela_plt_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, { // Do not handle weak-bind symbols // TODO should we? - lrm = (struct LibRelaMapping) { curr_rela_name, 0, 0 }; + continue; + } + if (curr_rela_sym_idx == 0) + { + // TODO In some test programs, there are some relocations with no + // name and some weird value; we currently filter them out, but + // they might mean something in the future + continue; } - else + + // TODO haven't handled addends on relocations yet + assert(curr_rela.r_addend == 0); + + new_relas[actual_relas] = (struct LibRelaMapping) { curr_rela_name, + curr_rela.r_offset + (char *) lib_dep->lib_mem_base, NULL, + ELF64_R_TYPE(curr_rela.r_info) }; + if (curr_rela_sym.st_value != 0) { - lrm = (struct LibRelaMapping) { curr_rela_name, - curr_rela.r_offset + (char *) lib_dep->lib_mem_base, NULL }; + new_relas[actual_relas].target_func_address + = curr_rela_sym.st_value + (char *) lib_dep->lib_mem_base; } - lib_dep->rela_maps[j] = lrm; + actual_relas += 1; } - free(rela_plt); + lib_dep->rela_maps = realloc(lib_dep->rela_maps, + (lib_dep->rela_maps_count + actual_relas) + * sizeof(struct LibRelaMapping)); + memcpy(&lib_dep->rela_maps[lib_dep->rela_maps_count], new_relas, + actual_relas * sizeof(struct LibRelaMapping)); + lib_dep->rela_maps_count += actual_relas; + + free(new_relas); + free(rela_sec); free(dyn_sym_tbl); free(dyn_str_tbl); } @@ -799,7 +843,7 @@ find_comp_entry_points( for (size_t i = 0; i < entry_point_count; ++i) { struct LibSymSearchResult found_sym - = find_lib_dep_sym_in_comp(entry_points[i], new_comp); + = find_lib_dep_sym_in_comp(entry_points[i], new_comp, STT_FUNC); if (found_sym.lib_idx == USHRT_MAX) { errx(1, "Did not find entry point %s!\n", entry_points[i]); @@ -829,7 +873,7 @@ find_comp_intercepts(char **intercepts, void **intercept_addrs, for (size_t i = 0; i < intercept_count; ++i) { struct LibSymSearchResult found_sym - = find_lib_dep_sym_in_comp(intercept_names[i], new_comp); + = find_lib_dep_sym_in_comp(intercept_names[i], new_comp, STT_FUNC); if (found_sym.lib_idx == USHRT_MAX) { continue; @@ -852,19 +896,43 @@ resolve_rela_syms(struct Compartment *new_comp) { for (size_t j = 0; j < new_comp->libs[i]->rela_maps_count; ++j) { - // Ignore relocations we don't want to load, as earlier set on - // lookup (e.g., weak-bound symbols) - if (new_comp->libs[i]->rela_maps[j].rela_address == 0) + if (new_comp->libs[i]->rela_maps[j].target_func_address != 0) + { + continue; + } + + unsigned short sym_to_find_type; + if (new_comp->libs[i]->rela_maps[j].rela_type + == R_AARCH64_JUMP_SLOT) + { + sym_to_find_type = STT_FUNC; + } + else if (new_comp->libs[i]->rela_maps[j].rela_type + == R_AARCH64_GLOB_DAT) + { + sym_to_find_type = STT_OBJECT; + } + else { continue; + // TODO do we want to handle other / all relocation types? + /*errx(1, "Unhandled relocation symbol of type `%hu` for symbol + * `%s` of library `%s`!\n", + * new_comp->libs[i]->rela_maps[j].rela_type, + * new_comp->libs[i]->rela_maps[j].rela_name, + * new_comp->libs[i]->lib_name);*/ } struct LibSymSearchResult found_sym = find_lib_dep_sym_in_comp( - new_comp->libs[i]->rela_maps[j].rela_name, new_comp); + new_comp->libs[i]->rela_maps[j].rela_name, new_comp, + sym_to_find_type); if (found_sym.lib_idx == USHRT_MAX) { - errx(1, "Did not find symbol %s!\n", - new_comp->libs[i]->rela_maps[j].rela_name); + errx(1, + "Did not find symbol %s of type %hu (idx %zu in library %s " + "(idx %zu))!\n", + new_comp->libs[i]->rela_maps[j].rela_name, sym_to_find_type, + j, new_comp->libs[i]->lib_name, i); } new_comp->libs[i]->rela_maps[j].target_func_address = extract_sym_offset(new_comp, found_sym); @@ -903,14 +971,15 @@ extract_sym_offset(struct Compartment *comp, struct LibSymSearchResult res) } static struct LibSymSearchResult -find_lib_dep_sym_in_comp( - const char *to_find, struct Compartment *comp_to_search) +find_lib_dep_sym_in_comp(const char *to_find, + struct Compartment *comp_to_search, const unsigned short sym_type) { for (size_t i = 0; i < comp_to_search->libs_count; ++i) { for (size_t j = 0; j < comp_to_search->libs[i]->lib_syms_count; ++j) { - if (!strcmp(to_find, comp_to_search->libs[i]->lib_syms[j].sym_name)) + if (!strcmp(to_find, comp_to_search->libs[i]->lib_syms[j].sym_name) + && comp_to_search->libs[i]->lib_syms[j].sym_type == sym_type) { struct LibSymSearchResult res = { i, j }; return res; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 532ec1d..5269592 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -122,10 +122,18 @@ set(comp_binaries "simple" "simple_libc" "simple_call_internal" + "simple_call_internal_static" + "simple_call_internal_weak" "simple_call_external" + "simple_static_var" + "simple_static_var-external" "simple_external" "simple_syscall_getpid" "simple_syscall_write" + "simple_va_args" + "simple_fputs" + "simple_printf" + "simple_fopen" #"time" #"lua_simple" #"lua_script" @@ -138,9 +146,16 @@ set(tests "simple" "simple_libc" "simple_call_internal" + "simple_call_internal_static" + #"simple_call_internal_weak" "simple_call_external" + "simple_static_var" "simple_syscall_getpid" "simple_syscall_write" + "simple_va_args" + #"simple_fputs" + #"simple_printf" + "simple_fopen" #"time" #"lua_simple" #"lua_script" @@ -176,9 +191,12 @@ endforeach() # Additional dependencies target_link_libraries(simple_call_external PRIVATE simple_external) +new_dependency(simple_call_external $) + +target_link_libraries(simple_static_var PRIVATE simple_static_var-external) +new_dependency(simple_static_var $) #new_dependency(test_map $) -new_dependency(simple_call_external $) #new_dependency(lua_script ${CMAKE_CURRENT_SOURCE_DIR}/hello_world.lua) #new_dependency(test_args_near_unmapped $) diff --git a/tests/simple_call_internal_static.c b/tests/simple_call_internal_static.c new file mode 100644 index 0000000..d403384 --- /dev/null +++ b/tests/simple_call_internal_static.c @@ -0,0 +1,16 @@ +#include +#include + +static int +call_internal(int x) +{ + return pow(x, 2); +} + +int +main(void) +{ + int val = 4; + assert(val * val == call_internal(val)); + return 0; +} diff --git a/tests/simple_call_internal_weak.c b/tests/simple_call_internal_weak.c new file mode 100644 index 0000000..25b039f --- /dev/null +++ b/tests/simple_call_internal_weak.c @@ -0,0 +1,12 @@ +#include +#include + +int __attribute__((weak)) call_internal(int x) { return pow(x, 2); } + +int +main(void) +{ + int val = 4; + assert(val * val == call_internal(val)); + return 0; +} diff --git a/tests/simple_fopen.c b/tests/simple_fopen.c new file mode 100644 index 0000000..7fd58d0 --- /dev/null +++ b/tests/simple_fopen.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +void +by_fopen() +{ + FILE *fd = fopen("tmp", "w"); + fprintf(fd, "Hi\n"); + fclose(fd); +} + +void +by_syscall() +{ +} + +void +by_open() +{ + int fd = open("tmp", O_WRONLY | O_CREAT); + if (fd == -1) + { + err(1, "Error in open: "); + } + char *buf = "Hi\n"; + write(fd, buf, strlen(buf)); + close(fd); +} + +int +main() +{ + by_open(); + return 0; +} diff --git a/tests/simple_fputs.c b/tests/simple_fputs.c new file mode 100644 index 0000000..64719a5 --- /dev/null +++ b/tests/simple_fputs.c @@ -0,0 +1,12 @@ +#include +#include + +int +main() +{ + /*FILE* strem = __stdoutp;*/ + FILE *strem = fdopen(STDOUT_FILENO, "w"); + fputs("Hello\n", strem); + fclose(strem); + return 0; +} diff --git a/tests/simple_printf.c b/tests/simple_printf.c index acf4544..a3159df 100644 --- a/tests/simple_printf.c +++ b/tests/simple_printf.c @@ -1,9 +1,13 @@ #include +#include int main(void) { + FILE *my_stdout = fdopen(STDOUT_FILENO, "w"); const char *hw = "Hello World!"; - printf("Inside - %s\n", hw); + fprintf(my_stdout, "Inside - %s\n", hw); + /*printf("Inside - %s\n", hw);*/ + fclose(my_stdout); return 0; } diff --git a/tests/simple_static_var-external.c b/tests/simple_static_var-external.c new file mode 100644 index 0000000..063a2bb --- /dev/null +++ b/tests/simple_static_var-external.c @@ -0,0 +1 @@ +unsigned short fortytwo = 42; diff --git a/tests/simple_static_var.c b/tests/simple_static_var.c new file mode 100644 index 0000000..49548fc --- /dev/null +++ b/tests/simple_static_var.c @@ -0,0 +1,10 @@ +#include + +extern unsigned short fortytwo; + +int +main() +{ + assert(fortytwo == 42); + return 0; +} diff --git a/tests/simple_va_args.c b/tests/simple_va_args.c new file mode 100644 index 0000000..5045300 --- /dev/null +++ b/tests/simple_va_args.c @@ -0,0 +1,26 @@ +#include +#include + +int +sum(int count, ...) +{ + va_list vals; + va_start(vals, count); + int acc = 0; + int val; + for (int i = 0; i < count; ++i) + { + val = va_arg(vals, int); + acc += val; + } + va_end(vals); + return acc; +} + +int +main() +{ + int suman = sum(3, 15, 30, -3); + assert(suman == 42); + return 0; +}