From 1c2d2c92a70477600e1e8b34aca642ecb70c2262 Mon Sep 17 00:00:00 2001 From: Andrei Lascu Date: Thu, 8 Feb 2024 14:38:58 +0000 Subject: [PATCH] Implement transitive eager relocation We now gather all symbols to be relocated from all `so` libraries within a compartment, and perform eager relocation at map time. This means, primarily, that we handle each `so` file within a compartment the same, then perform a sweep to relocate required symbols after parsing and mapping. Other small changes: * Improve and add more `simple` tests, to test more specific functionality; * Some overall code refactoring; * Add `-Wextra`, and fix more warnings --- CMakeLists.txt | 2 +- include/compartment.h | 73 ++- include/intercept.h | 8 +- src/compartment.c | 1146 ++++++++++++++++++---------------- src/intercept.c | 3 +- tests/CMakeLists.txt | 15 +- tests/args_simple.c | 2 +- tests/simple.c | 5 +- tests/simple_call_external.c | 13 + tests/simple_call_internal.c | 16 + tests/simple_external.c | 5 + tests/simple_libc.c | 11 + tests/simple_malloc.c | 12 + tests/simple_printf.c | 9 + tests/simple_various.c | 20 + 15 files changed, 772 insertions(+), 568 deletions(-) create mode 100644 tests/simple_call_external.c create mode 100644 tests/simple_call_internal.c create mode 100644 tests/simple_external.c create mode 100644 tests/simple_libc.c create mode 100644 tests/simple_malloc.c create mode 100644 tests/simple_printf.c create mode 100644 tests/simple_various.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 60239ce..71f6278 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.16) project(CHERI_ELF_Compartments LANGUAGES C ASM) # Set global compilation options -add_compile_options(-pedantic -Wno-gnu-binary-literal -Wno-language-extension-token -Werror) +add_compile_options(-pedantic -Wextra -Wno-gnu-binary-literal -Wno-language-extension-token -Werror) # Set useful directory variables set(TEST_DIR ${CMAKE_SOURCE_DIR}/tests) diff --git a/include/compartment.h b/include/compartment.h index 97c934b..ed4c35e 100644 --- a/include/compartment.h +++ b/include/compartment.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -108,7 +109,7 @@ struct SegmentMap * via PLT/GOT, we update the expected addresses eagerly once the code is * mapped into memory, via `comp_map` */ -struct CompRelaMapping +struct LibRelaMapping { char *rela_name; void *rela_address; // address of relocation in compartment @@ -120,47 +121,61 @@ struct CompRelaMapping struct LibDependencySymbol { char *sym_name; - intptr_t sym_offset; + void *sym_offset; }; -/* Struct representing a library dependency for one of our given compartments +/* Struct representing the result of searching for a library symbol in a + * compartment + */ +struct LibSymSearchResult +{ + unsigned short lib_idx; + unsigned short sym_idx; +}; + +/** + * Struct representing a library dependency for one of our given compartments */ struct LibDependency { char *lib_name; char *lib_path; + void *lib_mem_base; + + // Segments of interest (usually, of type `PT_LOAD`) within this library size_t lib_segs_count; size_t lib_segs_size; - void *lib_mem_base; - struct SegmentMap **lib_segs; + struct SegmentMap *lib_segs; + + // Symbols within this library size_t lib_syms_count; struct LibDependencySymbol *lib_syms; + + // Library dependencies for this library + unsigned short lib_dep_count; + char **lib_dep_names; + + // Symbols within this library that need eager relocation + size_t rela_maps_count; + struct LibRelaMapping *rela_maps; }; -/* Struct representing ELF data necessary to load and eventually execute a +/** + * Struct representing ELF data necessary to load and eventually execute a * compartment */ struct Compartment { // Identifiers size_t id; - int fd; - Elf64_Half elf_type; // Execution info - Elf64_Half phdr; void *__capability ddc; // ELF data size_t size; // size of compartment in memory void *base; // address where to load compartment - size_t entry_point_count; - struct CompEntryPoint **comp_eps; void *mem_top; bool mapped; - bool mapped_full; - // Segments data - struct SegmentMap **segs; - size_t seg_count; - size_t segs_size; + // Scratch memory void *scratch_mem_base; size_t scratch_mem_size; @@ -172,24 +187,27 @@ struct Compartment void *stack_pointer; struct MemAlloc *alloc_head; + // TODO double check / rework this process void *manager_caps; size_t max_manager_caps_count; size_t active_manager_caps_count; + // Transition function (duplicated across compartments, but must be within + // to be within DDC bounds) void *mng_trans_fn; size_t mng_trans_fn_sz; - // Only for shared object compartments - size_t rela_maps_count; - struct CompRelaMapping *rela_maps; - size_t lib_deps_count; - struct LibDependency **lib_deps; + // Internal libraries and relocations + size_t libs_count; + struct LibDependency **libs; + size_t entry_point_count; + struct CompEntryPoint *entry_points; // Hardware info - maybe move size_t page_size; // Misc - short curr_intercept_count; + unsigned short curr_intercept_count; struct InterceptPatch *intercept_patches; }; @@ -215,15 +233,4 @@ comp_clean(struct Compartment *); struct Compartment * find_comp(struct Compartment *); -static ssize_t -do_pread(int, void *, size_t, off_t); -static Elf64_Sym * -find_symbols(const char **, size_t, bool, Elf64_Sym *, char *, size_t); -static char * -find_in_dir(const char *, char *); -static void -init_comp_scratch_mem(struct Compartment *); -static void -init_lib_dep_info(struct LibDependency *, struct Compartment *); - #endif // _COMPARTMENT_H diff --git a/include/intercept.h b/include/intercept.h index 5b1bc02..70ce1db 100644 --- a/include/intercept.h +++ b/include/intercept.h @@ -81,12 +81,12 @@ int my_fprintf(FILE *, const char *, ...); size_t -my_call_comp(size_t, char *, void *, size_t); +my_call_comp(size_t, char *, void *); static const struct FuncIntercept to_intercept_funcs[] = { /* Mem funcs */ - { "malloc", (void *) my_malloc }, - { "realloc", (void *) my_realloc }, - { "free", (void *) my_free }, + { "malloc", (void *) my_malloc, NULL }, + { "realloc", (void *) my_realloc, NULL }, + { "free", (void *) my_free, NULL }, }; // // Functions to be intercepted and associated data diff --git a/src/compartment.c b/src/compartment.c index 95916b2..fbda2f1 100644 --- a/src/compartment.c +++ b/src/compartment.c @@ -1,5 +1,50 @@ #include "compartment.h" +const char *libs_path_env_var = "COMP_LIBRARY_PATH"; + +/******************************************************************************* + * Forward declarations + ******************************************************************************/ + +static void +get_lib_name(struct LibDependency *, const char *); +static struct LibDependency * +parse_lib_file(char *, struct Compartment *); +static void +parse_lib_segs(Elf64_Ehdr *, int, struct LibDependency *, struct Compartment *); +static void +parse_lib_symtb(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); +static void +parse_lib_relaplt(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); +static void +parse_lib_dynamic_deps(Elf64_Shdr *, Elf64_Ehdr *, int, struct LibDependency *); +static void +find_comp_entry_points(char **, size_t, struct Compartment *); +static void +find_comp_intercepts(char **, void **, size_t, struct Compartment *); +static void +resolve_rela_syms(struct Compartment *); +static struct LibSymSearchResult +find_lib_dep_sym_in_comp(const char *, struct Compartment *); +static void * +extract_sym_offset(struct Compartment *, struct LibSymSearchResult); + +static ssize_t +do_pread(int, void *, size_t, off_t); +static char * +find_in_dir(const char *, char *); +static void +init_comp_scratch_mem(struct Compartment *); + +static void +print_lib_dep_seg(struct SegmentMap *); +static void +print_lib_dep(struct LibDependency *); + +/******************************************************************************* + * Main compartment functions + ******************************************************************************/ + /* Initialize some values of the Compartment struct. The rest are expected to * be set in `comp_from_elf`. */ @@ -10,34 +55,44 @@ comp_init() struct Compartment *new_comp = (struct Compartment *) malloc(sizeof(struct Compartment)); - new_comp->phdr = 0; new_comp->ddc = NULL; new_comp->size = 0; - new_comp->base = 0; - new_comp->entry_point_count = 0; + new_comp->base = NULL; + new_comp->mem_top = NULL; new_comp->mapped = false; - new_comp->mapped_full = false; - new_comp->seg_count = 0; - new_comp->segs_size = 0; + new_comp->scratch_mem_base = NULL; + new_comp->scratch_mem_size = 0; + new_comp->scratch_mem_alloc = 0; + + new_comp->scratch_mem_heap_size = 0; + new_comp->scratch_mem_stack_top = NULL; + new_comp->scratch_mem_stack_size = 0; + new_comp->stack_pointer = NULL; + new_comp->alloc_head = NULL; + + new_comp->manager_caps = NULL; + new_comp->max_manager_caps_count = 0; + new_comp->active_manager_caps_count = 0; + + new_comp->mng_trans_fn = NULL; new_comp->mng_trans_fn_sz = sizeof(uint32_t) * COMP_TRANS_FN_INSTR_CNT; // TODO ptr arithmetic - new_comp->phdr = 0; - new_comp->alloc_head = NULL; - new_comp->rela_maps_count = 0; + new_comp->libs_count = 0; + new_comp->libs = NULL; + new_comp->entry_point_count = 0; + new_comp->entry_points = NULL; new_comp->page_size = sysconf(_SC_PAGESIZE); + new_comp->curr_intercept_count = 0; + new_comp->intercept_patches = NULL; return new_comp; } -/******************************************************************************* - * Main compartment functions - ******************************************************************************/ - /* Comparison function for `struct CompEntryPoint` */ int @@ -57,312 +112,53 @@ comp_from_elf(char *filename, char **entry_points, size_t entry_point_count, void *new_comp_base) { struct Compartment *new_comp = comp_init(); + new_comp->base = new_comp_base; + new_comp->mem_top = new_comp_base; - new_comp->fd = open(filename, O_RDONLY); - if (new_comp->fd == -1) - { - free(new_comp); - errx(1, "Error opening compartment file %s!\n", filename); - } + unsigned short libs_to_parse_count = 1; + unsigned short libs_parsed_count = 0; + char **libs_to_parse = malloc(sizeof(char *)); + libs_to_parse[0] = filename; - assert(entry_points); - assert(entry_point_count > 0); - new_comp->comp_eps - = malloc(entry_point_count * sizeof(struct CompEntryPoint)); + char *libs_folder = getenv(libs_path_env_var); - // Read elf headers - Elf64_Ehdr comp_ehdr; - assert(new_comp->fd != -1); - do_pread(new_comp->fd, &comp_ehdr, sizeof(Elf64_Ehdr), 0); - new_comp->elf_type = comp_ehdr.e_type; - if (new_comp->elf_type != ET_DYN) + while (libs_parsed_count != libs_to_parse_count) { - errx(1, "Only supporting ELFs of type DYN (shared object files)!"); - } - - struct stat elf_fd_stat; - fstat(new_comp->fd, &elf_fd_stat); - // TODO re-check if we're actually using file size anywhere; I think we're - // only using loaded-segment size - new_comp->size = elf_fd_stat.st_size; + struct LibDependency *parsed_lib + = parse_lib_file(libs_to_parse[libs_parsed_count], new_comp); - // Read program headers - Elf64_Phdr comp_phdr; - ptrdiff_t align_size_correction; - for (size_t i = 0; i < comp_ehdr.e_phnum; ++i) - { - do_pread((int) new_comp->fd, &comp_phdr, sizeof(comp_phdr), - comp_ehdr.e_phoff + i * sizeof(comp_phdr)); - - // We only need to keep `PT_LOAD` segments, so we can map them later - if (comp_phdr.p_type != PT_LOAD) + const unsigned short libs_to_search_count = libs_to_parse_count; + for (size_t i = 0; i < parsed_lib->lib_dep_count; ++i) { - continue; - } - - new_comp->base = new_comp_base; - - // Setup mapping info for the current segment - struct SegmentMap *this_seg - = (struct SegmentMap *) malloc(sizeof(struct SegmentMap)); - assert(this_seg != NULL); - void *curr_seg_base = (char *) new_comp->base + comp_phdr.p_vaddr; - this_seg->mem_bot = align_down(curr_seg_base, new_comp->page_size); - align_size_correction - = (char *) curr_seg_base - (char *) this_seg->mem_bot; - this_seg->mem_top = (char *) curr_seg_base + comp_phdr.p_memsz; - - this_seg->offset = align_down(comp_phdr.p_offset, new_comp->page_size); - this_seg->mem_sz = comp_phdr.p_memsz + align_size_correction; - this_seg->file_sz = comp_phdr.p_filesz + align_size_correction; - this_seg->correction = align_size_correction; - this_seg->prot_flags = (comp_phdr.p_flags & PF_R ? PROT_READ : 0) - | (comp_phdr.p_flags & PF_W ? PROT_WRITE : 0) - | (comp_phdr.p_flags & PF_X ? PROT_EXEC : 0); - - new_comp->segs = realloc(new_comp->segs, - (new_comp->seg_count + 1) * sizeof(struct SegmentMap *)); - new_comp->segs[new_comp->seg_count] = this_seg; - new_comp->seg_count += 1; - new_comp->segs_size += align_up(this_seg->mem_sz, comp_phdr.p_align); - } - - // Load `.shstr` section, so we can check section names - Elf64_Shdr comp_sh_strtb_hdr; - do_pread((int) new_comp->fd, &comp_sh_strtb_hdr, sizeof(Elf64_Shdr), - comp_ehdr.e_shoff + comp_ehdr.e_shstrndx * sizeof(Elf64_Shdr)); - char *comp_sh_strtb = malloc(comp_sh_strtb_hdr.sh_size); - do_pread((int) new_comp->fd, comp_sh_strtb, comp_sh_strtb_hdr.sh_size, - comp_sh_strtb_hdr.sh_offset); - - init_comp_scratch_mem(new_comp); - new_comp->mem_top = new_comp->scratch_mem_stack_top; - - // Find indices of interest that we'll use later - const size_t headers_of_interest_count = 3; - size_t found_headers = 0; - Elf64_Shdr comp_symtb_shdr; - Elf64_Shdr comp_rela_plt_shdr; - Elf64_Shdr comp_dynamic_shdr; - Elf64_Shdr curr_shdr; - for (size_t i = 0; i < comp_ehdr.e_shnum; ++i) - { - do_pread((int) new_comp->fd, &curr_shdr, sizeof(Elf64_Shdr), - comp_ehdr.e_shoff + i * sizeof(Elf64_Shdr)); - - if (curr_shdr.sh_type == SHT_SYMTAB) - { - comp_symtb_shdr = curr_shdr; - found_headers += 1; - } - // Lookup `.rela.plt` to eagerly load relocatable function addresses - else if (curr_shdr.sh_type == SHT_RELA - && !strcmp(&comp_sh_strtb[curr_shdr.sh_name], ".rela.plt")) - { - comp_rela_plt_shdr = curr_shdr; - found_headers += 1; - } - // Lookup `.dynamic` to find library dependencies - else if (curr_shdr.sh_type == SHT_DYNAMIC) - { - comp_dynamic_shdr = curr_shdr; - found_headers += 1; - } - - if (headers_of_interest_count == found_headers) - { - break; - } - } - assert(headers_of_interest_count == found_headers); - - // Traverse `.rela.plt`, so we can see which function addresses we need - // to eagerly load - Elf64_Rela *comp_rela_plt = malloc(comp_rela_plt_shdr.sh_size); - do_pread((int) new_comp->fd, comp_rela_plt, comp_rela_plt_shdr.sh_size, - comp_rela_plt_shdr.sh_offset); - size_t rela_count = comp_rela_plt_shdr.sh_size / sizeof(Elf64_Rela); - - Elf64_Shdr dyn_sym_hdr; - do_pread((int) new_comp->fd, &dyn_sym_hdr, sizeof(Elf64_Shdr), - comp_ehdr.e_shoff + comp_rela_plt_shdr.sh_link * sizeof(Elf64_Shdr)); - Elf64_Sym *dyn_sym_tbl = malloc(dyn_sym_hdr.sh_size); - do_pread((int) new_comp->fd, dyn_sym_tbl, dyn_sym_hdr.sh_size, - dyn_sym_hdr.sh_offset); - - Elf64_Shdr dyn_str_hdr; - do_pread((int) new_comp->fd, &dyn_str_hdr, sizeof(Elf64_Shdr), - comp_ehdr.e_shoff + dyn_sym_hdr.sh_link * sizeof(Elf64_Shdr)); - char *dyn_str_tbl = malloc(dyn_str_hdr.sh_size); - do_pread((int) new_comp->fd, dyn_str_tbl, dyn_str_hdr.sh_size, - dyn_str_hdr.sh_offset); - - new_comp->rela_maps = calloc(rela_count, sizeof(struct CompRelaMapping)); - new_comp->rela_maps_count = rela_count; - - // Log symbols that will need to be relocated eagerly at maptime - Elf64_Rela curr_rela; - for (size_t j = 0; j < new_comp->rela_maps_count; ++j) - { - curr_rela = comp_rela_plt[j]; - size_t curr_rela_sym_idx = ELF64_R_SYM(curr_rela.r_info); - Elf64_Sym curr_rela_sym = dyn_sym_tbl[curr_rela_sym_idx]; - char *curr_rela_name - = malloc(strlen(&dyn_str_tbl[curr_rela_sym.st_name]) + 1); - strcpy(curr_rela_name, &dyn_str_tbl[curr_rela_sym.st_name]); - if (ELF64_ST_BIND(curr_rela_sym.st_info) == STB_WEAK) - { - // Do not handle weak-bind symbols - // TODO should we? - struct CompRelaMapping crm = { curr_rela_name, 0, 0 }; - new_comp->rela_maps[j] = crm; - continue; - } // TODO collapse - - struct CompRelaMapping crm = { curr_rela_name, - curr_rela.r_offset + (char *) new_comp->base, NULL }; - new_comp->rela_maps[j] = crm; - } - free(comp_rela_plt); - free(dyn_sym_tbl); - - // Find additional library dependencies - Elf64_Dyn *comp_dyn_entries = malloc(comp_dynamic_shdr.sh_size); - do_pread((int) new_comp->fd, comp_dyn_entries, comp_dynamic_shdr.sh_size, - comp_dynamic_shdr.sh_offset); - - for (size_t i = 0; i < comp_dynamic_shdr.sh_size / sizeof(Elf64_Dyn); ++i) - { - if (comp_dyn_entries[i].d_tag == DT_NEEDED) - { - struct LibDependency *new_lib_dep - = malloc(sizeof(struct LibDependency)); - new_lib_dep->lib_name = malloc( - strlen(&dyn_str_tbl[comp_dyn_entries[i].d_un.d_val]) + 1); - strcpy(new_lib_dep->lib_name, - &dyn_str_tbl[comp_dyn_entries[i].d_un.d_val]); - new_comp->lib_deps_count += 1; - new_comp->lib_deps = realloc(new_comp->lib_deps, - new_comp->lib_deps_count * sizeof(struct LibDependency)); - new_comp->lib_deps[new_comp->lib_deps_count - 1] = new_lib_dep; - } - } - - free(dyn_str_tbl); - free(comp_dyn_entries); - - // Find library files in `COMP_LIBRARY_PATH` to fulfill dependencies - for (size_t i = 0; i < new_comp->lib_deps_count; ++i) - { - struct LibDependency *curr_dep = new_comp->lib_deps[i]; - // TODO move env var name to constant - assert(getenv("COMP_LIBRARY_PATH")); - char *lib_path - = find_in_dir(curr_dep->lib_name, getenv("COMP_LIBRARY_PATH")); - if (!lib_path) - { - errx(1, "Could not find file for dependency %s!\n", - curr_dep->lib_name); - } - curr_dep->lib_path = malloc(strlen(lib_path)); - strcpy(curr_dep->lib_path, lib_path); - init_lib_dep_info(curr_dep, new_comp); - new_comp->mem_top = (char *) curr_dep->lib_mem_base - + (uintptr_t) curr_dep->lib_segs[curr_dep->lib_segs_count - 1] - ->mem_top; - } - - // Find functions of interest, particularly entry points, and functions to - // intercept - Elf64_Shdr comp_strtb_hdr; - do_pread((int) new_comp->fd, &comp_strtb_hdr, sizeof(Elf64_Shdr), - comp_ehdr.e_shoff + comp_symtb_shdr.sh_link * sizeof(Elf64_Shdr)); - - // XXX The string table is read in `comp_strtb` as a sequence of - // variable-length strings. Then, symbol names are obtained by indexing at - // the offset where the name for that symbol begins. Therefore, the type - // `char*` for the string table makes sense. - char *comp_strtb = malloc(comp_strtb_hdr.sh_size); - do_pread((int) new_comp->fd, comp_strtb, comp_strtb_hdr.sh_size, - comp_strtb_hdr.sh_offset); - - Elf64_Sym *comp_symtb = malloc(comp_symtb_shdr.sh_size); - do_pread((int) new_comp->fd, comp_symtb, comp_symtb_shdr.sh_size, - comp_symtb_shdr.sh_offset); - - // Find symbols for entry_points - Elf64_Sym *ep_syms - = find_symbols((const char **) entry_points, entry_point_count, true, - comp_symtb, comp_strtb, comp_symtb_shdr.sh_size); - for (size_t i = 0; i < entry_point_count; ++i) - { - struct CompEntryPoint *new_entry_point - = malloc(sizeof(struct CompEntryPoint)); - new_entry_point->fn_name = entry_points[i]; - new_entry_point->fn_addr - = (char *) new_comp->base + ep_syms[i].st_value; - new_comp->comp_eps[new_comp->entry_point_count] = new_entry_point; - new_comp->entry_point_count += 1; - } - free(ep_syms); - - // Find symbols for intercepts - char **intercept_names = calloc(intercept_count, sizeof(char *)); - const char *so_plt_suffix = "@plt"; - for (size_t i = 0; i < intercept_count; ++i) - { - size_t to_intercept_name_len - = strlen(intercepts[i]) + strlen(so_plt_suffix) + 1; - intercept_names[i] = malloc(to_intercept_name_len); - strcpy(intercept_names[i], intercepts[i]); - strcat(intercept_names[i], so_plt_suffix); - } - Elf64_Sym *intercept_syms - = find_symbols((const char **) intercept_names, intercept_count, false, - comp_symtb, comp_strtb, comp_symtb_shdr.sh_size); - for (size_t i = 0; i < intercept_count; ++i) - { - // TODO better way to check if we didn't find an intercept? - if (intercept_syms[i].st_value != 0) - { - comp_add_intercept(new_comp, intercept_syms[i].st_value, - (uintptr_t) intercept_addrs[i]); - } - free(intercept_names[i]); - } - free(intercept_names); - free(intercept_syms); - - // Find all symbols for eager relocation mapping - for (size_t i = 0; i < new_comp->rela_maps_count; ++i) - { - // Ignore relocations we don't want to load, as earlier set on lookup - // (e.g., weak-bound symbols) - if (new_comp->rela_maps[i].rela_address == 0) - { - continue; - } - for (size_t j = 0; j < new_comp->lib_deps_count; ++j) - { - for (size_t k = 0; k < new_comp->lib_deps[j]->lib_syms_count; ++k) + for (size_t j = 0; j < libs_to_search_count; ++j) { - if (!strcmp(new_comp->rela_maps[i].rela_name, - new_comp->lib_deps[j]->lib_syms[k].sym_name)) + if (!strcmp(libs_to_parse[j], parsed_lib->lib_dep_names[i])) { - new_comp->rela_maps[i].target_func_address - = (char *) new_comp->lib_deps[j]->lib_mem_base - + new_comp->lib_deps[j]->lib_syms[k].sym_offset; - goto found; + goto next_dep; } } + libs_to_parse = realloc( + libs_to_parse, (libs_to_parse_count + 1) * sizeof(char *)); + libs_to_parse[libs_to_parse_count] = parsed_lib->lib_dep_names[i]; + libs_to_parse_count += 1; + // TODO check performance with goto versus without + next_dep: + (void) 0; } - errx(1, "Did not find symbol %s!\n", new_comp->rela_maps[i].rela_name); - found: - (void) 0; + libs_parsed_count += 1; } + free(libs_to_parse); - free(comp_symtb); - free(comp_strtb); + assert(entry_points); + assert(entry_point_count > 0); + + init_comp_scratch_mem(new_comp); + new_comp->mem_top = new_comp->scratch_mem_stack_top; + + find_comp_entry_points(entry_points, entry_point_count, new_comp); + find_comp_intercepts( + intercepts, intercept_addrs, intercept_count, new_comp); + resolve_rela_syms(new_comp); return new_comp; } @@ -483,53 +279,35 @@ comp_stack_push( void comp_map(struct Compartment *to_map) { - assert(!(to_map->mapped || to_map->mapped_full)); + assert(!(to_map->mapped)); struct SegmentMap *curr_seg; void *map_result; - // Map compartment segments - for (size_t i = 0; i < to_map->seg_count; ++i) - { - curr_seg = to_map->segs[i]; - map_result = mmap((void *) curr_seg->mem_bot, curr_seg->mem_sz, - /*curr_seg->prot_flags,*/ // TODO currently need read/write to - // inject the intercepts, consider better - // option - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); - if (map_result == MAP_FAILED) - { - errx(1, "Error mapping comp segment idx %zu", i); - } - do_pread(to_map->fd, (void *) curr_seg->mem_bot, curr_seg->file_sz, - curr_seg->offset); - } - // Map compartment library dependencies segments struct LibDependency *lib_dep; - struct SegmentMap *lib_dep_seg; + struct SegmentMap lib_dep_seg; int lib_dep_fd; - for (size_t i = 0; i < to_map->lib_deps_count; ++i) + for (size_t i = 0; i < to_map->libs_count; ++i) { - lib_dep = to_map->lib_deps[i]; + lib_dep = to_map->libs[i]; lib_dep_fd = open(lib_dep->lib_path, O_RDONLY); for (size_t j = 0; j < lib_dep->lib_segs_count; ++j) { lib_dep_seg = lib_dep->lib_segs[j]; map_result = mmap((char *) lib_dep->lib_mem_base - + (uintptr_t) lib_dep_seg->mem_bot, - lib_dep_seg->mem_sz, + + (uintptr_t) lib_dep_seg.mem_bot, + lib_dep_seg.mem_sz, PROT_READ | PROT_WRITE | PROT_EXEC, // TODO fix MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if (map_result == MAP_FAILED) { - errx(1, "Error mapping library %s dependency segment idx %zu", + err(1, "Error mapping library %s dependency segment idx %zu!\n", lib_dep->lib_name, j); } do_pread(lib_dep_fd, (char *) lib_dep->lib_mem_base - + (uintptr_t) lib_dep_seg->mem_bot, - lib_dep_seg->file_sz, lib_dep_seg->offset); + + (uintptr_t) lib_dep_seg.mem_bot, + lib_dep_seg.file_sz, lib_dep_seg.offset); } close(lib_dep_fd); } @@ -537,9 +315,12 @@ comp_map(struct Compartment *to_map) // Map compartment scratch memory map_result = mmap((void *) to_map->scratch_mem_base, to_map->scratch_mem_size, - PROT_READ | PROT_WRITE | PROT_EXEC, // TODO Fix this + PROT_READ | PROT_WRITE, // | PROT_EXEC, // TODO Fix this MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); - assert(map_result != MAP_FAILED); + if (map_result == MAP_FAILED) + { + err(1, "Error mapping compartment %zu scratch memory!\n", to_map->id); + } // Map compartment stack map_result = mmap( @@ -548,10 +329,13 @@ comp_map(struct Compartment *to_map) PROT_READ | PROT_WRITE | PROT_EXEC, // TODO fix this MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_STACK, -1, 0); to_map->stack_pointer = to_map->scratch_mem_stack_top; - assert(map_result != MAP_FAILED); + if (map_result == MAP_FAILED) + { + err(1, "Error mapping compartment %zu stack!\n", to_map->id); + } // Inject intercept instructions within identified intercepted functions - for (size_t i = 0; i < to_map->curr_intercept_count; ++i) + for (unsigned short i = 0; i < to_map->curr_intercept_count; ++i) { struct InterceptPatch to_patch = to_map->intercept_patches[i]; // TODO change to memcpy? @@ -569,14 +353,18 @@ comp_map(struct Compartment *to_map) to_map->mng_trans_fn_sz); // Bind `.got.plt` entries - for (size_t i = 0; i < to_map->rela_maps_count; ++i) + for (size_t i = 0; i < to_map->libs_count; ++i) { - if (to_map->rela_maps[i].rela_address == 0) + for (size_t j = 0; j < to_map->libs[i]->rela_maps_count; ++j) { - continue; + if (to_map->libs[i]->rela_maps[j].rela_address == 0) + { + continue; + } + memcpy(to_map->libs[i]->rela_maps[j].rela_address, + &to_map->libs[i]->rela_maps[j].target_func_address, + sizeof(void *)); } - memcpy((void *) to_map->rela_maps[i].rela_address, - &to_map->rela_maps[i].target_func_address, sizeof(void *)); } to_map->mapped = true; @@ -610,9 +398,9 @@ comp_exec( void *fn = NULL; for (size_t i = 0; i < to_exec->entry_point_count; ++i) { - if (!strcmp(fn_name, to_exec->comp_eps[i]->fn_name)) + if (!strcmp(fn_name, to_exec->entry_points[i].fn_name)) { - fn = (void *) to_exec->comp_eps[i]->fn_addr; + fn = (void *) to_exec->entry_points[i].fn_addr; break; } } @@ -639,147 +427,517 @@ comp_exec( /*arg = cheri_perms_and(arg, !(CHERI_PERM_STORE | CHERI_PERM_EXECUTE));*/ /*args_caps[i] = arg;*/ /*}*/ - result = comp_exec_in((void *) to_exec->stack_pointer, to_exec->ddc, fn, - args, args_count, sealed_redirect_cap); + result = comp_exec_in(to_exec->stack_pointer, to_exec->ddc, fn, args, + args_count, sealed_redirect_cap); return result; } void comp_clean(struct Compartment *to_clean) { - close(to_clean->fd); if (to_clean->mapped) { // TODO unmap } - else if (to_clean->mapped_full) + + struct LibDependency *curr_lib_dep; + for (size_t i = 0; i < to_clean->libs_count; ++i) { - // TODO unmap + size_t j; + curr_lib_dep = to_clean->libs[i]; + + // Clean library segments + free(curr_lib_dep->lib_segs); + + // Clean library symbol data + for (j = 0; j < curr_lib_dep->lib_syms_count; ++j) + { + free(curr_lib_dep->lib_syms[j].sym_name); + } + free(curr_lib_dep->lib_syms); + + // Clear library dependency names + for (j = 0; j < curr_lib_dep->lib_dep_count; ++j) + { + free(curr_lib_dep->lib_dep_names[j]); + } + free(curr_lib_dep->lib_dep_names); + + // Clean library relocation mappings + for (j = 0; j < curr_lib_dep->rela_maps_count; ++j) + { + free(curr_lib_dep->rela_maps[j].rela_name); + } + free(curr_lib_dep->rela_maps); + + free(curr_lib_dep->lib_name); + free(curr_lib_dep->lib_path); + free(curr_lib_dep); } + free(to_clean->libs); + free(to_clean->entry_points); + free(to_clean->intercept_patches); + free(to_clean); +} + +/******************************************************************************* + * Compartment library functions + * + * Functions dealing with parsing individual library files and correctly + * placing them within a Compartment + ******************************************************************************/ - for (size_t i = 0; i < to_clean->seg_count; ++i) +static struct LibDependency * +parse_lib_file(char *lib_name, struct Compartment *new_comp) +{ + int lib_fd = open(lib_name, O_RDONLY); + char *lib_path = NULL; + if (lib_fd == -1) { - free(to_clean->segs[i]); + // Try to find the library in dependent paths + // TODO currently only $COMP_LIBRARY_PATH + lib_path = find_in_dir(lib_name, getenv(libs_path_env_var)); + lib_fd = open(lib_path, O_RDONLY); + if (lib_fd == -1) + { + errx(1, "Error opening compartment file %s!\n", lib_path); + } } - free(to_clean->segs); - for (size_t i = 0; i < to_clean->entry_point_count; ++i) + // Read ELF headers + Elf64_Ehdr lib_ehdr; + do_pread(lib_fd, &lib_ehdr, sizeof(Elf64_Ehdr), 0); + if (lib_ehdr.e_type != ET_DYN) { - free((char *) to_clean->comp_eps[i]->fn_name); - free(to_clean->comp_eps[i]); + errx(1, + "Error parsing `%s` - only supporting ELFs of type DYN (shared " + "object files)!\n", + lib_path); } - for (size_t i = 0; i < to_clean->rela_maps_count; ++i) + struct LibDependency *new_lib = malloc(sizeof(struct LibDependency)); + new_lib->lib_name = malloc(strlen(lib_name)); + strcpy(new_lib->lib_name, lib_name); + if (lib_path) + { + new_lib->lib_path = malloc(strlen(lib_path)); + strcpy(new_lib->lib_path, lib_path); + } + else { - free(to_clean->rela_maps[i].rela_name); + new_lib->lib_path = malloc(strlen(lib_name)); + strcpy(new_lib->lib_path, lib_name); } - free(to_clean->rela_maps); - struct LibDependency *ld; - for (size_t i = 0; i < to_clean->lib_deps_count; ++i) + // Initialization + new_lib->lib_mem_base = NULL; + + new_lib->lib_segs_count = 0; + new_lib->lib_segs_size = 0; + new_lib->lib_segs = NULL; + + new_lib->lib_syms_count = 0; + new_lib->lib_syms = NULL; + + new_lib->lib_dep_count = 0; + new_lib->lib_dep_names = NULL; + + new_lib->rela_maps_count = 0; + new_lib->rela_maps = NULL; + + parse_lib_segs(&lib_ehdr, lib_fd, new_lib, new_comp); + + // Load `.shstr` section, so we can check section names + Elf64_Shdr shstrtab_hdr; + do_pread(lib_fd, &shstrtab_hdr, sizeof(Elf64_Shdr), + lib_ehdr.e_shoff + lib_ehdr.e_shstrndx * sizeof(Elf64_Shdr)); + char *shstrtab = malloc(shstrtab_hdr.sh_size); + do_pread(lib_fd, shstrtab, shstrtab_hdr.sh_size, shstrtab_hdr.sh_offset); + + // XXX The string table is read in `strtab` as a sequence of + // variable-length strings. Then, symbol names are obtained by indexing at + // the offset where the name for that symbol begins. Therefore, the type + // `char*` for the string table makes sense. + // + // Example: + // ------------------------------- + // | "foo\0" | "bar\0" | "baz\0" | + // ------------------------------- + // 0123 4567 89ab + // + // Symbol table entries will have the "name" value of the three + // corresponding symbols as 0, 4, and 8. + + // Traverse sections once to get headers for sections of interest + // + // XXX According to the ELF specification version 1.2, for UNIX, there are + // only one of each `SHT_SYMTAB`, `SHT_DYNSYM`, and `SHT_DYNAMIC`. Further, + // we assume there can only be one section with the name `.rela.plt`. + // Therefore, we expect each `if` to be only entered once. However, we not + // that this can be changed in future specifications. + // + // Source: https://refspecs.linuxfoundation.org/elf/elf.pdf + const size_t headers_of_interest_count = 3; + size_t found_headers = 0; + Elf64_Shdr curr_shdr; + for (size_t i = 0; i < lib_ehdr.e_shnum; ++i) { - ld = to_clean->lib_deps[i]; - free(ld->lib_name); - free(ld->lib_path); - for (size_t j = 0; j < ld->lib_segs_count; ++j) + do_pread(lib_fd, &curr_shdr, sizeof(Elf64_Shdr), + lib_ehdr.e_shoff + i * sizeof(Elf64_Shdr)); + + if (curr_shdr.sh_type == SHT_SYMTAB) + { + parse_lib_symtb(&curr_shdr, &lib_ehdr, lib_fd, new_lib); + found_headers += 1; + } + // Lookup `.rela.plt` to eagerly load relocatable function addresses + else if (curr_shdr.sh_type == SHT_RELA + && !strcmp(&shstrtab[curr_shdr.sh_name], ".rela.plt")) { - free(ld->lib_segs[j]); + parse_lib_relaplt(&curr_shdr, &lib_ehdr, lib_fd, new_lib); + found_headers += 1; } - free(ld->lib_segs); - for (size_t j = 0; j < ld->lib_syms_count; ++j) + // Lookup `.dynamic` to find library dependencies + else if (curr_shdr.sh_type == SHT_DYNAMIC) { - free(ld->lib_syms[j].sym_name); + parse_lib_dynamic_deps(&curr_shdr, &lib_ehdr, lib_fd, new_lib); + found_headers += 1; + } + + if (headers_of_interest_count == found_headers) + { + break; } - free(ld->lib_syms); - free(ld); } - free(to_clean->lib_deps); - free(to_clean->intercept_patches); + assert(headers_of_interest_count == found_headers); - free(to_clean); - // TODO -} + close(lib_fd); + new_comp->libs_count += 1; + new_comp->libs = realloc( + new_comp->libs, new_comp->libs_count * sizeof(struct LibDependency *)); + new_comp->libs[new_comp->libs_count - 1] = new_lib; -/******************************************************************************* - * Helper functions - ******************************************************************************/ + free(shstrtab); -static ssize_t -do_pread(int fd, void *buf, size_t count, off_t offset) + return new_lib; +} + +static void +parse_lib_segs(Elf64_Ehdr *lib_ehdr, int lib_fd, struct LibDependency *lib_dep, + struct Compartment *new_comp) { - size_t res = pread(fd, buf, count, offset); - if (res == -1) + // Get segment data + Elf64_Phdr lib_phdr; + for (size_t i = 0; i < lib_ehdr->e_phnum; ++i) { - err(1, "Error in pread"); + do_pread(lib_fd, &lib_phdr, sizeof(Elf64_Phdr), + lib_ehdr->e_phoff + i * sizeof(lib_phdr)); + if (lib_phdr.p_type != PT_LOAD) + { + continue; + } + + struct SegmentMap *this_seg = malloc(sizeof(struct SegmentMap)); + this_seg->mem_bot + = (void *) align_down(lib_phdr.p_vaddr, new_comp->page_size); + this_seg->correction + = (char *) lib_phdr.p_vaddr - (char *) this_seg->mem_bot; + this_seg->mem_top = (char *) lib_phdr.p_vaddr + lib_phdr.p_memsz; + this_seg->offset = align_down(lib_phdr.p_offset, new_comp->page_size); + this_seg->mem_sz = lib_phdr.p_memsz + this_seg->correction; + this_seg->file_sz = lib_phdr.p_filesz + this_seg->correction; + this_seg->prot_flags = (lib_phdr.p_flags & PF_R ? PROT_READ : 0) + | (lib_phdr.p_flags & PF_W ? PROT_WRITE : 0) + | (lib_phdr.p_flags & PF_X ? PROT_EXEC : 0); + + lib_dep->lib_segs_count += 1; + lib_dep->lib_segs_size + += align_up(this_seg->mem_sz, lib_phdr.p_align); // TODO check + lib_dep->lib_segs = realloc(lib_dep->lib_segs, + lib_dep->lib_segs_count * sizeof(struct SegmentMap)); + memcpy(&lib_dep->lib_segs[lib_dep->lib_segs_count - 1], this_seg, + sizeof(struct SegmentMap)); + free(this_seg); } - return res; + lib_dep->lib_mem_base = align_up( + (char *) new_comp->mem_top + new_comp->page_size, new_comp->page_size); + new_comp->size += lib_dep->lib_segs_size; + new_comp->mem_top = (char *) lib_dep->lib_mem_base + lib_dep->lib_segs_size; } -static Elf64_Sym * -find_symbols(const char **names, size_t names_to_find_count, bool find_all, - Elf64_Sym *symtb, char *strtb, size_t symtb_sz) +static void +parse_lib_symtb(Elf64_Shdr *symtb_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, + struct LibDependency *lib_dep) { - Elf64_Sym *found_syms = calloc(names_to_find_count, sizeof(Elf64_Sym)); + // Get symbol table + Elf64_Shdr link_shdr; + assert(symtb_shdr->sh_link); + do_pread(lib_fd, &link_shdr, sizeof(Elf64_Shdr), + lib_ehdr->e_shoff + symtb_shdr->sh_link * sizeof(Elf64_Shdr)); + + Elf64_Sym *sym_tb = malloc(symtb_shdr->sh_size); + do_pread(lib_fd, sym_tb, symtb_shdr->sh_size, symtb_shdr->sh_offset); + char *str_tb = malloc(link_shdr.sh_size); + do_pread(lib_fd, str_tb, link_shdr.sh_size, link_shdr.sh_offset); + + lib_dep->lib_syms_count = symtb_shdr->sh_size / sizeof(Elf64_Sym); + size_t actual_syms = 0; + struct LibDependencySymbol *ld_syms + = malloc(lib_dep->lib_syms_count * sizeof(struct LibDependencySymbol)); + Elf64_Sym curr_sym; - size_t found_syms_count = 0; - for (size_t i = 0; i < symtb_sz / sizeof(Elf64_Sym); ++i) + for (size_t j = 0; j < lib_dep->lib_syms_count; ++j) { - curr_sym = symtb[i]; - for (size_t j = 0; j < names_to_find_count; ++j) + curr_sym = sym_tb[j]; + // TODO only handling FUNC symbols for now + if (ELF64_ST_TYPE(curr_sym.st_info) != STT_FUNC) { - // XXX As a follow-up from how we handle the string table, here we - // get symbol names by indexing at the `char` offset, then getting - // the string pointer (equivalent to `strtb + curr_sym.st_name`). - if (!strcmp(names[j], &strtb[curr_sym.st_name])) - { - found_syms[j] = curr_sym; - found_syms_count += 1; - } + continue; + } + if (curr_sym.st_value == 0) + { + continue; + } + ld_syms[actual_syms].sym_offset = (void *) curr_sym.st_value; + char *sym_name = &str_tb[curr_sym.st_name]; + ld_syms[actual_syms].sym_name = malloc(strlen(sym_name) + 1); + strcpy(ld_syms[actual_syms].sym_name, sym_name); + actual_syms += 1; + } + ld_syms + = realloc(ld_syms, actual_syms * sizeof(struct LibDependencySymbol)); + lib_dep->lib_syms_count = actual_syms; + lib_dep->lib_syms = ld_syms; + + free(sym_tb); + free(str_tb); +} + +static void +parse_lib_relaplt(Elf64_Shdr *rela_plt_shdr, Elf64_Ehdr *lib_ehdr, int lib_fd, + struct LibDependency *lib_dep) +{ + // Traverse `.rela.plt`, so we can see which function addresses we need + // to eagerly load + Elf64_Rela *rela_plt = malloc(rela_plt_shdr->sh_size); + do_pread( + lib_fd, rela_plt, rela_plt_shdr->sh_size, rela_plt_shdr->sh_offset); + size_t rela_count = rela_plt_shdr->sh_size / sizeof(Elf64_Rela); + + Elf64_Shdr dyn_sym_hdr; + do_pread(lib_fd, &dyn_sym_hdr, sizeof(Elf64_Shdr), + lib_ehdr->e_shoff + rela_plt_shdr->sh_link * sizeof(Elf64_Shdr)); + Elf64_Sym *dyn_sym_tbl = malloc(dyn_sym_hdr.sh_size); + do_pread(lib_fd, dyn_sym_tbl, dyn_sym_hdr.sh_size, dyn_sym_hdr.sh_offset); + + Elf64_Shdr dyn_str_hdr; + do_pread(lib_fd, &dyn_str_hdr, sizeof(Elf64_Shdr), + lib_ehdr->e_shoff + dyn_sym_hdr.sh_link * sizeof(Elf64_Shdr)); + char *dyn_str_tbl = malloc(dyn_str_hdr.sh_size); + do_pread(lib_fd, dyn_str_tbl, dyn_str_hdr.sh_size, dyn_str_hdr.sh_offset); + + lib_dep->rela_maps = malloc(rela_count * sizeof(struct LibRelaMapping)); + lib_dep->rela_maps_count = rela_count; + + // Log symbols that will need to be relocated eagerly at maptime + Elf64_Rela curr_rela; + for (size_t j = 0; j < lib_dep->rela_maps_count; ++j) + { + curr_rela = rela_plt[j]; + size_t curr_rela_sym_idx = ELF64_R_SYM(curr_rela.r_info); + Elf64_Sym curr_rela_sym = dyn_sym_tbl[curr_rela_sym_idx]; + char *curr_rela_name + = malloc(strlen(&dyn_str_tbl[curr_rela_sym.st_name]) + 1); + strcpy(curr_rela_name, &dyn_str_tbl[curr_rela_sym.st_name]); + struct LibRelaMapping lrm; + if (ELF64_ST_BIND(curr_rela_sym.st_info) == STB_WEAK) + { + // Do not handle weak-bind symbols + // TODO should we? + lrm = (struct LibRelaMapping) { curr_rela_name, 0, 0 }; + } + else + { + lrm = (struct LibRelaMapping) { curr_rela_name, + curr_rela.r_offset + (char *) lib_dep->lib_mem_base, NULL }; + } + lib_dep->rela_maps[j] = lrm; + } + free(rela_plt); + free(dyn_sym_tbl); + free(dyn_str_tbl); +} + +static void +parse_lib_dynamic_deps(Elf64_Shdr *dynamic_shdr, Elf64_Ehdr *lib_ehdr, + int lib_fd, struct LibDependency *lib_dep) +{ + // Find additional library dependencies + Elf64_Dyn *dyn_entries = malloc(dynamic_shdr->sh_size); + do_pread( + lib_fd, dyn_entries, dynamic_shdr->sh_size, dynamic_shdr->sh_offset); + Elf64_Shdr dynstr_shdr; + do_pread(lib_fd, &dynstr_shdr, sizeof(Elf64_Shdr), + lib_ehdr->e_shoff + dynamic_shdr->sh_link * sizeof(Elf64_Shdr)); + char *dynstr_tbl = malloc(dynstr_shdr.sh_size); + do_pread(lib_fd, dynstr_tbl, dynstr_shdr.sh_size, dynstr_shdr.sh_offset); + + for (size_t i = 0; i < dynamic_shdr->sh_size / sizeof(Elf64_Dyn); ++i) + { + if (dyn_entries[i].d_tag == DT_NEEDED) + { + lib_dep->lib_dep_names = realloc(lib_dep->lib_dep_names, + (lib_dep->lib_dep_count + 1) * sizeof(char *)); + lib_dep->lib_dep_names[lib_dep->lib_dep_count] + = malloc(strlen(&dynstr_tbl[dyn_entries[i].d_un.d_val])); + strcpy(lib_dep->lib_dep_names[lib_dep->lib_dep_count], + &dynstr_tbl[dyn_entries[i].d_un.d_val]); + lib_dep->lib_dep_count += 1; + } + } + + free(dynstr_tbl); + free(dyn_entries); +} + +static void +find_comp_entry_points( + char **entry_points, size_t entry_point_count, struct Compartment *new_comp) +{ + new_comp->entry_points + = malloc(entry_point_count * sizeof(struct CompEntryPoint)); + for (size_t i = 0; i < entry_point_count; ++i) + { + struct LibSymSearchResult found_sym + = find_lib_dep_sym_in_comp(entry_points[i], new_comp); + if (found_sym.lib_idx == USHRT_MAX) + { + errx(1, "Did not find entry point %s!\n", entry_points[i]); + } + struct CompEntryPoint new_entry_point + = { entry_points[i], extract_sym_offset(new_comp, found_sym) }; + new_comp->entry_points[new_comp->entry_point_count] = new_entry_point; + new_comp->entry_point_count += 1; + } +} + +static void +find_comp_intercepts(char **intercepts, void **intercept_addrs, + size_t intercept_count, struct Compartment *new_comp) +{ + // Find symbols for intercepts + char **intercept_names = malloc(intercept_count * sizeof(char *)); + const char *so_plt_suffix = "@plt"; + for (size_t i = 0; i < intercept_count; ++i) + { + size_t to_intercept_name_len + = strlen(intercepts[i]) + strlen(so_plt_suffix) + 1; + intercept_names[i] = malloc(to_intercept_name_len); + strcpy(intercept_names[i], intercepts[i]); + strcat(intercept_names[i], so_plt_suffix); + } + for (size_t i = 0; i < intercept_count; ++i) + { + struct LibSymSearchResult found_sym + = find_lib_dep_sym_in_comp(intercept_names[i], new_comp); + if (found_sym.lib_idx == USHRT_MAX) + { + continue; } + + // TODO double check + comp_add_intercept(new_comp, + (uintptr_t) extract_sym_offset(new_comp, found_sym), + (uintptr_t) intercept_addrs[i]); + free(intercept_names[i]); } + free(intercept_names); +} - // If we didn't find all symbols that we wanted to intercept, throw an error - if (find_all && found_syms_count != names_to_find_count) +static void +resolve_rela_syms(struct Compartment *new_comp) +{ + // Find all symbols for eager relocation mapping + for (size_t i = 0; i < new_comp->libs_count; ++i) { - const char **not_found_syms = malloc(names_to_find_count); - size_t not_found_idx = 0; - for (size_t i = 0; i < names_to_find_count; ++i) + for (size_t j = 0; j < new_comp->libs[i]->rela_maps_count; ++j) { - bool not_found = true; - for (size_t j = 0; j < found_syms_count; ++j) + // Ignore relocations we don't want to load, as earlier set on + // lookup (e.g., weak-bound symbols) + if (new_comp->libs[i]->rela_maps[j].rela_address == 0) { - if (!strcmp(&strtb[found_syms[j].st_name], names[i])) - { - not_found = false; - break; - } + continue; } - if (not_found) + + struct LibSymSearchResult found_sym = find_lib_dep_sym_in_comp( + new_comp->libs[i]->rela_maps[j].rela_name, new_comp); + if (found_sym.lib_idx == USHRT_MAX) { - not_found_syms[not_found_idx] = names[i]; - not_found_idx += 1; + errx(1, "Did not find symbol %s!\n", + new_comp->libs[i]->rela_maps[j].rela_name); } + new_comp->libs[i]->rela_maps[j].target_func_address + = extract_sym_offset(new_comp, found_sym); } - printf("Did not find following entry points [ "); - for (size_t i = 0; i < not_found_idx; ++i) + } +} + +/******************************************************************************* + * Helper functions + ******************************************************************************/ + +static ssize_t +do_pread(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t res = pread(fd, buf, count, offset); + if (res == -1) + { + err(1, "Error in pread"); + } + return res; +} + +static void +get_lib_name(struct LibDependency *lib_dep, const char *lib_path) +{ + const char *basename = strrchr(lib_path, '/') + 1; + lib_dep->lib_name = malloc(strlen(basename)); + strcpy(lib_dep->lib_name, basename); +} + +static void * +extract_sym_offset(struct Compartment *comp, struct LibSymSearchResult res) +{ + return (char *) comp->libs[res.lib_idx]->lib_mem_base + + (intptr_t) comp->libs[res.lib_idx]->lib_syms[res.sym_idx].sym_offset; +} + +static struct LibSymSearchResult +find_lib_dep_sym_in_comp( + const char *to_find, struct Compartment *comp_to_search) +{ + for (size_t i = 0; i < comp_to_search->libs_count; ++i) + { + for (size_t j = 0; j < comp_to_search->libs[i]->lib_syms_count; ++j) { - printf("%s ", not_found_syms[i]); + if (!strcmp(to_find, comp_to_search->libs[i]->lib_syms[j].sym_name)) + { + struct LibSymSearchResult res = { i, j }; + return res; + } } - printf("]\n"); - free(not_found_syms); - free(found_syms); - errx(1, NULL); } - - return found_syms; + struct LibSymSearchResult res = { -1, -1 }; + return res; } static char * -find_in_dir(const char *lib_name, char *search_dir) +find_in_dir(const char *const lib_name, char *search_dir) { - errno = 0; - char **search_paths = malloc(sizeof(char *)); + assert(search_dir != NULL); + char **search_paths = malloc(2 * sizeof(char *)); search_paths[0] = search_dir; + search_paths[1] = NULL; FTS *dir = fts_open(search_paths, FTS_LOGICAL, NULL); if (!dir) { @@ -791,21 +949,25 @@ find_in_dir(const char *lib_name, char *search_dir) { if (!strcmp(lib_name, curr_entry->fts_name)) { - return curr_entry->fts_path; + break; } } fts_close(dir); free(search_paths); + if (curr_entry != NULL) + { + return curr_entry->fts_path; + } return NULL; } +// TODO carefully recheck all the numbers are right static void init_comp_scratch_mem(struct Compartment *new_comp) { - new_comp->scratch_mem_base - = align_up((char *) new_comp->segs[new_comp->seg_count - 1]->mem_top - + new_comp->page_size, - new_comp->page_size); + new_comp->scratch_mem_base = align_up( + (char *) new_comp->base + new_comp->size + new_comp->page_size, + new_comp->page_size); new_comp->max_manager_caps_count = 10; // TODO new_comp->scratch_mem_heap_size = 0x800000UL; // TODO new_comp->scratch_mem_size = new_comp->scratch_mem_heap_size @@ -834,105 +996,43 @@ init_comp_scratch_mem(struct Compartment *new_comp) assert(new_comp->scratch_mem_size % 16 == 0); } -/* Get the segment data for segments we will be mapping for a library dependency - */ +/******************************************************************************* + * Print functions + ******************************************************************************/ static void -init_lib_dep_info(struct LibDependency *lib_dep, struct Compartment *new_comp) +print_lib_dep_seg(struct SegmentMap *lib_dep_seg) { - lib_dep->lib_segs_count = 0; - int lib_fd = open(lib_dep->lib_path, O_RDONLY); - assert(lib_fd != -1 && "Error opening `lib_fd`"); - Elf64_Ehdr lib_ehdr; - Elf64_Phdr lib_phdr; - do_pread(lib_fd, &lib_ehdr, sizeof(Elf64_Ehdr), 0); + printf(">> bot %p // top %p // off 0x%zx // corr 0x%zx // msz 0x%zx // fsz " + "0x%zx\n", + lib_dep_seg->mem_bot, lib_dep_seg->mem_top, lib_dep_seg->offset, + lib_dep_seg->correction, lib_dep_seg->mem_sz, lib_dep_seg->file_sz); +} - // Get segment data - for (size_t i = 0; i < lib_ehdr.e_phnum; ++i) +static void +print_lib_dep(struct LibDependency *lib_dep) +{ + printf("== LIB DEPENDENCY\n"); + printf("- lib_name : %s\n", lib_dep->lib_name); + printf("- lib_path : %s\n", lib_dep->lib_path); + printf("- lib_mem_base : %p\n", lib_dep->lib_mem_base); + + printf("- lib_segs_count : %lu\n", lib_dep->lib_segs_count); + printf("- lib_segs_size : 0x%zx\n", lib_dep->lib_segs_size); + for (size_t i = 0; i < lib_dep->lib_segs_count; ++i) { - do_pread((int) lib_fd, &lib_phdr, sizeof(Elf64_Phdr), - lib_ehdr.e_phoff + i * sizeof(lib_phdr)); - if (lib_phdr.p_type != PT_LOAD) - { - continue; - } - - struct SegmentMap *this_seg = malloc(sizeof(struct SegmentMap)); - this_seg->mem_bot - = (void *) align_down(lib_phdr.p_vaddr, new_comp->page_size); - this_seg->correction - = (char *) lib_phdr.p_vaddr - (char *) this_seg->mem_bot; - this_seg->mem_top = (char *) lib_phdr.p_vaddr + lib_phdr.p_memsz; - this_seg->offset = align_down(lib_phdr.p_offset, new_comp->page_size); - this_seg->mem_sz = lib_phdr.p_memsz + this_seg->correction; - this_seg->file_sz = lib_phdr.p_filesz + this_seg->correction; - this_seg->prot_flags = (lib_phdr.p_flags & PF_R ? PROT_READ : 0) - | (lib_phdr.p_flags & PF_W ? PROT_WRITE : 0) - | (lib_phdr.p_flags & PF_X ? PROT_EXEC : 0); - - lib_dep->lib_segs_count += 1; - lib_dep->lib_segs_size - += align_up(this_seg->mem_sz, lib_phdr.p_align); // TODO check - lib_dep->lib_segs = realloc(lib_dep->lib_segs, - lib_dep->lib_segs_count * sizeof(struct SegmentMap)); - lib_dep->lib_segs[lib_dep->lib_segs_count - 1] = this_seg; + printf("\t"); + print_lib_dep_seg(&lib_dep->lib_segs[i]); } - lib_dep->lib_mem_base = align_down( - (char *) new_comp->mem_top + new_comp->page_size, new_comp->page_size); - new_comp->size += new_comp->page_size + lib_dep->lib_segs_size; + printf("- lib_syms_count : %lu\n", lib_dep->lib_syms_count); - // Get symbol table - Elf64_Shdr curr_shdr; - Elf64_Shdr link_shdr; - Elf64_Sym curr_sym; - for (size_t i = 0; i < lib_ehdr.e_shnum; ++i) + printf("- lib_dep_count : %hu\n", lib_dep->lib_dep_count); + printf("- lib_dep_names :\n"); + for (size_t i = 0; i < lib_dep->lib_dep_count; ++i) { - do_pread((int) lib_fd, &curr_shdr, sizeof(Elf64_Shdr), - lib_ehdr.e_shoff + i * sizeof(Elf64_Shdr)); - if (curr_shdr.sh_type != SHT_SYMTAB) - { - continue; - } - - assert(curr_shdr.sh_link); - do_pread((int) lib_fd, &link_shdr, sizeof(Elf64_Shdr), - lib_ehdr.e_shoff + curr_shdr.sh_link * sizeof(Elf64_Shdr)); - - Elf64_Sym *sym_tb = malloc(curr_shdr.sh_size); - do_pread((int) lib_fd, sym_tb, curr_shdr.sh_size, curr_shdr.sh_offset); - char *str_tb = malloc(link_shdr.sh_size); - do_pread((int) lib_fd, str_tb, link_shdr.sh_size, link_shdr.sh_offset); - - lib_dep->lib_syms_count = curr_shdr.sh_size / sizeof(Elf64_Sym); - size_t actual_syms = 0; - struct LibDependencySymbol *ld_syms = malloc( - lib_dep->lib_syms_count * sizeof(struct LibDependencySymbol)); - for (size_t j = 0; j < lib_dep->lib_syms_count; ++j) - { - curr_sym = sym_tb[j]; - // TODO only handling FUNC symbols for now - if (ELF64_ST_TYPE(curr_sym.st_info) != STT_FUNC) - { - continue; - } - if (curr_sym.st_value == 0) - { - continue; - } - ld_syms[actual_syms].sym_offset = curr_sym.st_value; - char *sym_name = &str_tb[curr_sym.st_name]; - ld_syms[actual_syms].sym_name = malloc(strlen(sym_name) + 1); - strcpy(ld_syms[actual_syms].sym_name, sym_name); - actual_syms += 1; - } - ld_syms = realloc( - ld_syms, actual_syms * sizeof(struct LibDependencySymbol)); - lib_dep->lib_syms_count = actual_syms; - lib_dep->lib_syms = ld_syms; - - free(sym_tb); - free(str_tb); + printf("--- %s\n", lib_dep->lib_dep_names[i]); } - close(lib_fd); + printf("- rela_maps_count : %zu\n", lib_dep->rela_maps_count); + printf("== DONE\n"); } diff --git a/src/intercept.c b/src/intercept.c index 353c790..1613a36 100644 --- a/src/intercept.c +++ b/src/intercept.c @@ -99,7 +99,8 @@ my_free(void *ptr) } size_t -my_call_comp(size_t comp_id, char *fn_name, void *args, size_t args_count) +my_call_comp( + size_t comp_id, char *fn_name, void *args) // TODO , size_t args_count) { struct Compartment *to_call = manager_get_compartment_by_id(comp_id); return exec_comp(to_call, fn_name, args); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f517ef3..694f98d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -62,7 +62,8 @@ function(new_comp_test test_name) set_property(TARGET ${test_name} PROPERTY compartment TRUE) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${test_name}.comp) - set_property(TARGET ${test_name} PROPERTY compartment_config ${CMAKE_CURRENT_SOURCE_DIR}/${test_name}.comp) + set_property(TARGET ${test_name} + PROPERTY compartment_config ${CMAKE_CURRENT_SOURCE_DIR}/${test_name}.comp) endif() endfunction() @@ -118,6 +119,10 @@ set(func_binaries set(comp_binaries "simple" + "simple_libc" + "simple_call_internal" + "simple_call_external" + "simple_external" #"time" #"lua_simple" #"lua_script" @@ -128,6 +133,9 @@ set(comp_binaries set(tests "simple" + "simple_libc" + "simple_call_internal" + "simple_call_external" #"time" #"lua_simple" #"lua_script" @@ -162,7 +170,10 @@ foreach(func_t IN LISTS func_binaries) endforeach() # Additional dependencies -new_dependency(test_map $) +target_link_libraries(simple_call_external PRIVATE simple_external) + +#new_dependency(test_map $) +new_dependency(simple_call_external $) #new_dependency(lua_script ${CMAKE_CURRENT_SOURCE_DIR}/hello_world.lua) #new_dependency(test_args_near_unmapped $) diff --git a/tests/args_simple.c b/tests/args_simple.c index 329f436..292a404 100644 --- a/tests/args_simple.c +++ b/tests/args_simple.c @@ -49,7 +49,7 @@ int main(int argc, char **argv) { size_t sum = 0; - for (size_t i = 0; i < argc; ++i) + for (int i = 0; i < argc; ++i) { sum += atoi(argv[i]); } diff --git a/tests/simple.c b/tests/simple.c index 250e88e..a7408c0 100644 --- a/tests/simple.c +++ b/tests/simple.c @@ -5,7 +5,6 @@ int main(void) { - assert(ceil(1.4) == 2); - assert(pow(2, 4) == 16); - return 0; + int x = 20; + return x - x; } diff --git a/tests/simple_call_external.c b/tests/simple_call_external.c new file mode 100644 index 0000000..74f9675 --- /dev/null +++ b/tests/simple_call_external.c @@ -0,0 +1,13 @@ +#include +#include + +int +call_external(int); + +int +main(void) +{ + int val = 4; + assert(val == call_external(val)); + return 0; +} diff --git a/tests/simple_call_internal.c b/tests/simple_call_internal.c new file mode 100644 index 0000000..e752caa --- /dev/null +++ b/tests/simple_call_internal.c @@ -0,0 +1,16 @@ +#include +#include + +int +call_internal(int x) +{ + return pow(x, 2); +} + +int +main(void) +{ + int val = 4; + assert(val * val == call_internal(val)); + return 0; +} diff --git a/tests/simple_external.c b/tests/simple_external.c new file mode 100644 index 0000000..9ae4a13 --- /dev/null +++ b/tests/simple_external.c @@ -0,0 +1,5 @@ +int +call_external(int val) +{ + return val; +} diff --git a/tests/simple_libc.c b/tests/simple_libc.c new file mode 100644 index 0000000..250e88e --- /dev/null +++ b/tests/simple_libc.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int +main(void) +{ + assert(ceil(1.4) == 2); + assert(pow(2, 4) == 16); + return 0; +} diff --git a/tests/simple_malloc.c b/tests/simple_malloc.c new file mode 100644 index 0000000..c0d6995 --- /dev/null +++ b/tests/simple_malloc.c @@ -0,0 +1,12 @@ +#include +#include + +int +main(void) +{ + const char *hw = "Hello World!"; + char *x = malloc(strlen(hw)); + strcpy(x, hw); + free(x); + return 0; +} diff --git a/tests/simple_printf.c b/tests/simple_printf.c new file mode 100644 index 0000000..acf4544 --- /dev/null +++ b/tests/simple_printf.c @@ -0,0 +1,9 @@ +#include + +int +main(void) +{ + const char *hw = "Hello World!"; + printf("Inside - %s\n", hw); + return 0; +} diff --git a/tests/simple_various.c b/tests/simple_various.c new file mode 100644 index 0000000..7d61a27 --- /dev/null +++ b/tests/simple_various.c @@ -0,0 +1,20 @@ +#include +#include + +static const char *hw = "Hello World!"; + +void +do_print(const char *const to_print) +{ + printf("Doing print: %s", to_print); +} + +int +main(void) +{ + char *x = malloc(strlen(hw)); + strcpy(x, hw); + do_print(x); + free(x); + return 0; +}