From 6058c1369d1e1486a95d6e6c3fa344e80052ecbd Mon Sep 17 00:00:00 2001 From: Alwin Joshy Date: Fri, 20 Sep 2024 16:20:41 +1000 Subject: [PATCH] aarch64: make the microkit loader relocatable This allows a microkit image to be loaded anywhere in memory at which point it will relocate itself to the LOADER_LINK_ADDRESS. These changes are based on the elfloader from seL4/seL4_tools. Changes include: * crt0.s was changed to crt0.S and compiled with gcc to pass in LINK_ADDRESS * memmove and memcpy implementations were added * loader_data was changed to include a size field. This is the size of loader_data, the list of regions, and the regions themselves. These also need to be relocated to ensure that everything works correctly Signed-off-by: Alwin Joshy --- loader/Makefile | 2 +- loader/src/aarch64/crt0.S | 114 ++++++++++++++++++++++++++++++++++++ loader/src/aarch64/crt0.s | 25 -------- loader/src/loader.c | 110 +++++++++++++++++++++++++++++++--- tool/microkit/src/loader.rs | 31 ++++++---- 5 files changed, 235 insertions(+), 47 deletions(-) create mode 100644 loader/src/aarch64/crt0.S delete mode 100644 loader/src/aarch64/crt0.s diff --git a/loader/Makefile b/loader/Makefile index 4195f1e3..7e20cc17 100644 --- a/loader/Makefile +++ b/loader/Makefile @@ -54,7 +54,7 @@ LINKSCRIPT_INPUT := $(ARCH).ld LINKSCRIPT := $(BUILD_DIR)/link.ld $(BUILD_DIR)/%.o : src/$(ARCH_DIR)/%.S - $(TOOLCHAIN)gcc -x assembler-with-cpp -c $(ASM_FLAGS) $< -o $@ + $(TOOLCHAIN)gcc -DLINK_ADDRESS=$(LINK_ADDRESS) -x assembler-with-cpp -c $(ASM_FLAGS) $< -o $@ $(BUILD_DIR)/%.o : src/$(ARCH_DIR)/%.s $(TOOLCHAIN)as $< -o $@ diff --git a/loader/src/aarch64/crt0.S b/loader/src/aarch64/crt0.S new file mode 100644 index 00000000..6c3c4d24 --- /dev/null +++ b/loader/src/aarch64/crt0.S @@ -0,0 +1,114 @@ +/* + * Copyright 2021, Breakaway Consulting Pty. Ltd. + * + * SPDX-License-Identifier: BSD-2-Clause + */ +.extern main + +.section ".text.start" + +.global _start; +.type _start, %function; +_start: + + mrs x0, mpidr_el1 + and x0, x0,#0xFF // Check processor id + cbz x0, master // Hang for all non-primary CPU + +proc_hang: + wfe + b proc_hang + +master: + adrp x1, _stack + add x1, x1, #0xff0 + mov sp, x1 + /* May not have been loaded in the right location. + * Try and move ourselves so we're in the right place + */ + bl fixup_image_base + cmp x0, #1 + beq 1f + /* Otherwise, jump to the start of the new elf-loader */ + br x0 +1: + b main + +fixup_image_base: + stp x29, x30, [sp, #-16]! + mov x29, sp + /* Check if the image is already at the correct location */ + ldr x0, =LINK_ADDRESS + adr x1, _start + cmp x0, x1 + beq image_ok + + /* Sanity check: We don't want to overwrite ourselves! We assume that + * everything between _start (src_start) and _bss_end (src_end) is important (i.e. + * something that might be run while relocating) but allow overlap for + * things after _bss_end i.e. the loader_data. + */ + adrp x2, _bss_end + add x2, x2, #:lo12:_bss_end + + /* The loader_data is directly after _bss_end, with the first + * value being the loader_data struct. The first field of this + * struct is the size of the loader_data region, so we add + * this to _bss_end to get the real end of the image + */ + ldr x3, [x2] + add x2, x2, x3 + sub x2, x2, x1 + + adrp x3, _bss_end + add x3, x3, #:lo12:_bss_end + + add x4, x0, x2 /* dst_end */ + + /* At this point: + * x0: dst_start (LINK_ADDRESS) + * x1: src_start (_start) + * x2: image_size + * x3: src_end (_bss_end) + * x4: dst_end (LINK_ADDRESS + image_size) + */ + + /* check: if (dst_end >= src_start && dst_end < src_end) { abort } */ + cmp x4, x1 + blt 1f + + cmp x4, x3 + bge 1f + + b cant_reloc +1: + + /* check: if (dst_start >= src_start && dest_start < src_end) { abort } */ + cmp x0, x1 + blt 2f + + cmp x0, x3 + bge 2f +cant_reloc: + b abort + +2: + /* x0 = desired image base */ + /* x1 = current image base */ + /* x2 = image size */ + bl memmove + + /* x0 = dest, save it to a callee-saved register while we invalidate icache */ + mov x19, x0 + bl flush_dcache + bl invalidate_icache + mov x0, x19 + b 1f + +image_ok: + /* Already in the right place, keep booting */ + mov x0, #1 + +1: + ldp x29, x30, [sp], #16 + ret diff --git a/loader/src/aarch64/crt0.s b/loader/src/aarch64/crt0.s deleted file mode 100644 index bee97e18..00000000 --- a/loader/src/aarch64/crt0.s +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2021, Breakaway Consulting Pty. Ltd. - * - * SPDX-License-Identifier: BSD-2-Clause - */ -.extern main - -.section ".text.start" - -.global _start; -.type _start, %function; -_start: - - mrs x0, mpidr_el1 - and x0, x0,#0xFF // Check processor id - cbz x0, master // Hang for all non-primary CPU - -proc_hang: - wfe - b proc_hang - -master: - ldr x1, =_stack + 0xff0 - mov sp, x1 - b main diff --git a/loader/src/loader.c b/loader/src/loader.c index 7aeccc62..d3caf821 100644 --- a/loader/src/loader.c +++ b/loader/src/loader.c @@ -10,8 +10,10 @@ _Static_assert(sizeof(uintptr_t) == 8 || sizeof(uintptr_t) == 4, "Expect uintptr #if UINTPTR_MAX == 0xffffffffUL #define WORD_SIZE 32 +#define BYTE_PER_WORD 4 #else #define WORD_SIZE 64 +#define BYTE_PER_WORD 8 #endif #if WORD_SIZE == 32 @@ -56,6 +58,7 @@ struct region { }; struct loader_data { + uintptr_t size; uintptr_t magic; uintptr_t flags; uintptr_t kernel_entry; @@ -81,6 +84,97 @@ typedef void (*sel4_entry)( uintptr_t extra_device_size ); + +void *memcpy(void *restrict dest, const void *restrict src, size_t n) +{ + unsigned char *d = (unsigned char *)dest; + const unsigned char *s = (const unsigned char *)src; + + /* For ARM, we also need to consider if src is aligned. * + * There are two cases: (1) If rs == 0 and rd == 0, dest * + * and src are copy_unit-aligned. (2) If (rs == rd && rs != 0), * + * src and dest can be made copy_unit-aligned by copying rs bytes * + * first. (1) is a special case of (2). */ + + size_t copy_unit = BYTE_PER_WORD; + while (1) { + int rs = (uintptr_t)s % copy_unit; + int rd = (uintptr_t)d % copy_unit; + if (rs == rd) { + break; + } + if (copy_unit == 1) { + break; + } + copy_unit >>= 1; + } + +#ifdef HAS_MAY_ALIAS + /* copy byte by byte until copy-unit aligned */ + for (; (uintptr_t)d % copy_unit != 0 && n > 0; d++, s++, n--) { + *d = *s; + } + /* copy unit by unit as long as we can */ + for (; n > copy_unit - 1; n -= copy_unit, s += copy_unit, d += copy_unit) { + switch (copy_unit) { + case 8: + *(uint64_t *)d = *(const uint64_t *)s; + break; + case 4: + *(uint32_t *)d = *(const uint32_t *)s; + break; + case 2: + *(uint16_t *)d = *(const uint16_t *)s; + break; + case 1: + *(uint8_t *)d = *(const uint8_t *)s; + break; + default: + printf("Invalid copy unit %ld\n", copy_unit); + abort(); + } + } + /* copy any remainder byte by byte */ + for (; n > 0; d++, s++, n--) { + *d = *s; + } +#else + size_t i; + for (i = 0; i < n; i++) { + d[i] = s[i]; + } +#endif + + return dest; +} + +void *memmove(void *restrict dest, const void *restrict src, size_t n) +{ + unsigned char *d = (unsigned char *)dest; + const unsigned char *s = (const unsigned char *)src; + + /* no copying to do */ + if (d == s) { + return dest; + } + /* for non-overlapping regions, just use memcpy */ + else if (s + n <= d || d + n <= s) { + return memcpy(dest, src, n); + } + /* if copying from the start of s to the start of d, just use memcpy */ + else if (s > d) { + return memcpy(dest, src, n); + } + + /* copy from end of 's' to end of 'd' */ + size_t i; + for (i = 1; i <= n; i++) { + d[n - i] = s[n - i]; + } + + return dest; +} + void switch_to_el1(void); void switch_to_el2(void); void el1_mmu_enable(void); @@ -111,15 +205,6 @@ extern char _text; extern char _bss_end; const struct loader_data *loader_data = (void *) &_bss_end; -static void memcpy(void *dst, const void *src, size_t sz) -{ - char *dst_ = dst; - const char *src_ = src; - while (sz-- > 0) { - *dst_++ = *src_++; - } -} - #if defined(BOARD_tqma8xqp1gb) #define UART_BASE 0x5a070000 #define STAT 0x14 @@ -429,6 +514,13 @@ static char *ec_to_string(uintptr_t ec) } #endif +void abort(void) +{ + puts("abort() was called. This means relocation failed. \n"); + + while (1); +} + /* * Print out the loader data structure. * diff --git a/tool/microkit/src/loader.rs b/tool/microkit/src/loader.rs index ab031550..559c034c 100644 --- a/tool/microkit/src/loader.rs +++ b/tool/microkit/src/loader.rs @@ -108,6 +108,7 @@ struct LoaderRegion64 { #[repr(C)] struct LoaderHeader64 { + size: u64, magic: u64, flags: u64, kernel_entry: u64, @@ -279,7 +280,25 @@ impl<'a> Loader<'a> { false => 0, }; + let mut region_metadata = Vec::new(); + let mut offset: u64 = 0; + for (addr, data) in &all_regions { + region_metadata.push(LoaderRegion64 { + load_addr: *addr, + size: data.len() as u64, + offset, + r#type: 1, + }); + offset += data.len() as u64; + } + + let size = std::mem::size_of::() as u64 + + region_metadata.iter().fold(0_u64, |acc, x| { + acc + x.size + std::mem::size_of::() as u64 + }); + let header = LoaderHeader64 { + size, magic, flags, kernel_entry, @@ -292,18 +311,6 @@ impl<'a> Loader<'a> { num_regions: all_regions.len() as u64, }; - let mut region_metadata = Vec::new(); - let mut offset: u64 = 0; - for (addr, data) in &all_regions { - region_metadata.push(LoaderRegion64 { - load_addr: *addr, - size: data.len() as u64, - offset, - r#type: 1, - }); - offset += data.len() as u64; - } - Loader { image, header,