From c00f0fc72f1c48f0b36e3ad64494ce5bfce22df8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 27 Feb 2024 14:07:32 +0100 Subject: [PATCH 1/8] elf: misc improvement * add handling for --section-start, -Tbss, -Ttext and -Tdata opts * sort alloc sections by address before allocating phdrs --- src/Elf.zig | 159 +++++++++++++++++++++++----------------- src/Elf/Options.zig | 31 +++++++- src/Elf/Symbol.zig | 2 +- src/Elf/relocatable.zig | 2 +- test/elf.zig | 53 ++++++++++++++ 5 files changed, 175 insertions(+), 72 deletions(-) diff --git a/src/Elf.zig b/src/Elf.zig index 5ba4f7ef..dd773b8e 100644 --- a/src/Elf.zig +++ b/src/Elf.zig @@ -446,7 +446,7 @@ fn sortInitFini(self: *Elf) !void { } }; - for (self.sections.items(.shdr), 0..) |*shdr, shndx| { + for (self.sections.items(.shdr), 0..) |shdr, shndx| { if (!shdrIsAlloc(shdr)) continue; var is_init_fini = false; @@ -1017,34 +1017,56 @@ fn initPhdrs(self: *Elf) !void { }); } - // Add LOAD phdrs const slice = self.sections.slice(); + + // Add LOAD phdrs { - var last_phdr: ?u16 = null; - var shndx: usize = 0; - while (shndx < slice.len) { - const shdr = &slice.items(.shdr)[shndx]; - if (!shdrIsAlloc(shdr) or shdrIsTbss(shdr)) { - shndx += 1; - continue; + var sorted = try std.ArrayList(elf.Elf64_Shdr).initCapacity(self.base.allocator, slice.len); + defer sorted.deinit(); + + for (slice.items(.shdr)) |shdr| { + if (!shdrIsAlloc(shdr) or shdrIsTbss(shdr)) continue; + sorted.appendAssumeCapacity(shdr); + } + + const sortShdr = struct { + fn lessThan(ctx: void, lhs: elf.Elf64_Shdr, rhs: elf.Elf64_Shdr) bool { + _ = ctx; + return lhs.sh_addr < rhs.sh_addr; } - last_phdr = try self.addPhdr(.{ + }.lessThan; + mem.sort(elf.Elf64_Shdr, sorted.items, {}, sortShdr); + + var is_phdr_included = false; + var shndx: usize = 0; + while (shndx < sorted.items.len) { + const shdr = sorted.items[shndx]; + const p_flags = shdrToPhdrFlags(shdr.sh_flags); + const phndx = try self.addPhdr(.{ .type = elf.PT_LOAD, - .flags = shdrToPhdrFlags(shdr.sh_flags), + .flags = p_flags, .@"align" = @max(self.options.page_size.?, shdr.sh_addralign), - .offset = if (last_phdr == null) 0 else shdr.sh_offset, - .addr = if (last_phdr == null) self.options.image_base else shdr.sh_addr, + .offset = shdr.sh_offset, + .addr = shdr.sh_addr, }); - const p_flags = self.phdrs.items[last_phdr.?].p_flags; - try self.addShdrToPhdr(last_phdr.?, shdr); + if (!is_phdr_included and p_flags == elf.PF_R) { + const phdr = &self.phdrs.items[phndx]; + phdr.p_offset = 0; + phdr.p_vaddr = self.options.image_base; + phdr.p_paddr = phdr.p_vaddr; + is_phdr_included = true; + } + try self.addShdrToPhdr(phndx, shdr); shndx += 1; - while (shndx < slice.len) : (shndx += 1) { - const next = &slice.items(.shdr)[shndx]; - if (shdrIsTbss(next)) continue; + while (shndx < sorted.items.len) : (shndx += 1) { + const next = sorted.items[shndx]; if (p_flags == shdrToPhdrFlags(next.sh_flags)) { - if (shdrIsBss(next) or next.sh_offset - shdr.sh_offset == next.sh_addr - shdr.sh_addr) { - try self.addShdrToPhdr(last_phdr.?, next); + if (shdrIsBss(next) or + (next.sh_offset > shdr.sh_offset and next.sh_addr > shdr.sh_addr and + next.sh_offset - shdr.sh_offset == next.sh_addr - shdr.sh_addr)) + { + try self.addShdrToPhdr(phndx, next); continue; } } @@ -1057,7 +1079,7 @@ fn initPhdrs(self: *Elf) !void { { var shndx: usize = 0; outer: while (shndx < slice.len) { - const shdr = &slice.items(.shdr)[shndx]; + const shdr = slice.items(.shdr)[shndx]; if (!shdrIsTls(shdr)) { shndx += 1; continue; @@ -1073,7 +1095,7 @@ fn initPhdrs(self: *Elf) !void { shndx += 1; while (shndx < slice.len) : (shndx += 1) { - const next = &slice.items(.shdr)[shndx]; + const next = slice.items(.shdr)[shndx]; if (!shdrIsTls(next)) continue :outer; try self.addShdrToPhdr(self.tls_phdr_index.?, next); } @@ -1130,7 +1152,7 @@ fn initPhdrs(self: *Elf) !void { .@"align" = 1, }); - // Backpatch size of the PHDR phdr + // Backpatch size of the PHDR phdr and possibly RO segment that holds it { const phdr = &self.phdrs.items[phdr_index]; const size = @sizeOf(elf.Elf64_Phdr) * self.phdrs.items.len; @@ -1139,7 +1161,7 @@ fn initPhdrs(self: *Elf) !void { } } -fn addShdrToPhdr(self: *Elf, phdr_index: u16, shdr: *const elf.Elf64_Shdr) !void { +fn addShdrToPhdr(self: *Elf, phdr_index: u16, shdr: elf.Elf64_Shdr) !void { const phdr = &self.phdrs.items[phdr_index]; phdr.p_align = @max(phdr.p_align, shdr.sh_addralign); if (shdr.sh_type != elf.SHT_NOBITS) { @@ -1157,23 +1179,23 @@ fn shdrToPhdrFlags(sh_flags: u64) u32 { return out_flags; } -inline fn shdrIsAlloc(shdr: *const elf.Elf64_Shdr) bool { +inline fn shdrIsAlloc(shdr: elf.Elf64_Shdr) bool { return shdr.sh_flags & elf.SHF_ALLOC != 0; } -inline fn shdrIsBss(shdr: *const elf.Elf64_Shdr) bool { +inline fn shdrIsBss(shdr: elf.Elf64_Shdr) bool { return shdrIsZerofill(shdr) and !shdrIsTls(shdr); } -inline fn shdrIsTbss(shdr: *const elf.Elf64_Shdr) bool { +inline fn shdrIsTbss(shdr: elf.Elf64_Shdr) bool { return shdrIsZerofill(shdr) and shdrIsTls(shdr); } -pub inline fn shdrIsZerofill(shdr: *const elf.Elf64_Shdr) bool { +pub inline fn shdrIsZerofill(shdr: elf.Elf64_Shdr) bool { return shdr.sh_type == elf.SHT_NOBITS; } -pub inline fn shdrIsTls(shdr: *const elf.Elf64_Shdr) bool { +pub inline fn shdrIsTls(shdr: elf.Elf64_Shdr) bool { return shdr.sh_flags & elf.SHF_TLS != 0; } @@ -1201,7 +1223,7 @@ fn allocateSectionsInMemory(self: *Elf, base_offset: u64) !void { }; var alignment = Align{}; - for (shdrs, 0..) |*shdr, i| { + for (shdrs, 0..) |shdr, i| { if (!shdrIsTls(shdr)) continue; if (alignment.first_tls_index == null) alignment.first_tls_index = i; alignment.tls_start_align = @max(alignment.tls_start_align, shdr.sh_addralign); @@ -1211,7 +1233,14 @@ fn allocateSectionsInMemory(self: *Elf, base_offset: u64) !void { var i: usize = 0; while (i < shdrs.len) : (i += 1) { const shdr = &shdrs[i]; - if (!shdrIsAlloc(shdr)) continue; + const name = self.shstrtab.getAssumeExists(shdr.sh_name); + if (!shdrIsAlloc(shdr.*)) continue; + if (self.options.section_start.get(name)) |sh_addr| { + addr = sh_addr; + shdr.sh_addr = addr; + addr += shdr.sh_size; + continue; + } if (i > 0) { const prev_shdr = shdrs[i - 1]; if (shdrToPhdrFlags(shdr.sh_flags) != shdrToPhdrFlags(prev_shdr.sh_flags)) { @@ -1219,7 +1248,7 @@ fn allocateSectionsInMemory(self: *Elf, base_offset: u64) !void { addr += self.options.page_size.?; } } - if (shdrIsTbss(shdr)) { + if (shdrIsTbss(shdr.*)) { // .tbss is a little special as it's used only by the loader meaning it doesn't // need to be actually mmap'ed at runtime. We still need to correctly increment // the addresses of every TLS zerofill section tho. Thus, we hack it so that @@ -1233,7 +1262,7 @@ fn allocateSectionsInMemory(self: *Elf, base_offset: u64) !void { // .data 0x10 // ... var tbss_addr = addr; - while (i < shdrs.len and shdrIsTbss(&shdrs[i])) : (i += 1) { + while (i < shdrs.len and shdrIsTbss(shdrs[i])) : (i += 1) { const tbss_shdr = &shdrs[i]; tbss_addr = alignment.@"align"(i, tbss_shdr.sh_addralign, tbss_addr); tbss_shdr.sh_addr = tbss_addr; @@ -1257,18 +1286,18 @@ fn allocateSectionsInFile(self: *Elf, base_offset: u64) void { var i: usize = 0; while (i < shdrs.len) { const first = &shdrs[i]; - defer if (!shdrIsAlloc(first) or shdrIsZerofill(first)) { + defer if (!shdrIsAlloc(first.*) or shdrIsZerofill(first.*)) { i += 1; }; // Non-alloc sections don't need congruency with their allocated virtual memory addresses - if (!shdrIsAlloc(first)) { + if (!shdrIsAlloc(first.*)) { first.sh_offset = mem.alignForward(u64, offset, first.sh_addralign); offset = first.sh_offset + first.sh_size; continue; } // Skip any zerofill section - if (shdrIsZerofill(first)) continue; + if (shdrIsZerofill(first.*)) continue; // Set the offset to a value that is congruent with the section's allocated virtual memory address if (first.sh_addralign > page_size) { @@ -1283,7 +1312,7 @@ fn allocateSectionsInFile(self: *Elf, base_offset: u64) void { prev.sh_offset = offset + prev.sh_addr - first.sh_addr; i += 1; - const next = &shdrs[i]; + const next = shdrs[i]; if (i >= shdrs.len or !shdrIsAlloc(next) or shdrIsZerofill(next)) break; if (next.sh_addr < first.sh_addr) break; @@ -1295,7 +1324,7 @@ fn allocateSectionsInFile(self: *Elf, base_offset: u64) void { offset = prev.sh_offset + prev.sh_size; // Skip any zerofill section - while (i < shdrs.len and shdrIsAlloc(&shdrs[i]) and shdrIsZerofill(&shdrs[i])) : (i += 1) {} + while (i < shdrs.len and shdrIsAlloc(shdrs[i]) and shdrIsZerofill(shdrs[i])) : (i += 1) {} } } @@ -1318,47 +1347,45 @@ fn getSectionRank(self: *Elf, shndx: u32) u8 { const shdr = self.sections.items(.shdr)[shndx]; const name = self.shstrtab.getAssumeExists(shdr.sh_name); const flags = shdr.sh_flags; - switch (shdr.sh_type) { - elf.SHT_NULL => return 0, - elf.SHT_DYNSYM => return 2, - elf.SHT_HASH => return 3, - elf.SHT_GNU_HASH => return 3, - elf.SHT_GNU_VERSYM => return 4, - elf.SHT_GNU_VERDEF => return 4, - elf.SHT_GNU_VERNEED => return 4, + const rank: u8 = switch (shdr.sh_type) { + elf.SHT_NULL => 0, + elf.SHT_DYNSYM => 2, + elf.SHT_HASH => 3, + elf.SHT_GNU_HASH => 3, + elf.SHT_GNU_VERSYM => 4, + elf.SHT_GNU_VERDEF => 4, + elf.SHT_GNU_VERNEED => 4, elf.SHT_PREINIT_ARRAY, elf.SHT_INIT_ARRAY, elf.SHT_FINI_ARRAY, - => return 0xf2, + => 0xf2, - elf.SHT_DYNAMIC => return 0xf3, + elf.SHT_DYNAMIC => 0xf3, - elf.SHT_RELA, elf.SHT_GROUP => return 0xf, + elf.SHT_RELA, elf.SHT_GROUP => 0xf, - elf.SHT_PROGBITS => if (flags & elf.SHF_ALLOC != 0) { + elf.SHT_PROGBITS => if (flags & elf.SHF_ALLOC != 0) blk: { if (flags & elf.SHF_EXECINSTR != 0) { - return 0xf1; + break :blk 0xf1; } else if (flags & elf.SHF_WRITE != 0) { - return if (flags & elf.SHF_TLS != 0) 0xf4 else 0xf6; + break :blk if (flags & elf.SHF_TLS != 0) 0xf4 else 0xf6; } else if (mem.eql(u8, name, ".interp")) { - return 1; - } else { - return 0xf0; - } - } else { - if (mem.startsWith(u8, name, ".debug")) { - return 0xf8; + break :blk 1; } else { - return 0xf9; + break :blk 0xf0; } - }, + } else if (mem.startsWith(u8, name, ".debug")) + 0xf8 + else + 0xf9, - elf.SHT_NOBITS => return if (flags & elf.SHF_TLS != 0) 0xf5 else 0xf7, - elf.SHT_SYMTAB => return 0xfa, - elf.SHT_STRTAB => return if (mem.eql(u8, name, ".dynstr")) 4 else 0xfb, - else => return 0xff, - } + elf.SHT_NOBITS => if (flags & elf.SHF_TLS != 0) 0xf5 else 0xf7, + elf.SHT_SYMTAB => 0xfa, + elf.SHT_STRTAB => if (mem.eql(u8, name, ".dynstr")) 4 else 0xfb, + else => 0xff, + }; + return rank; } pub fn sortSections(self: *Elf) !void { diff --git a/src/Elf/Options.zig b/src/Elf/Options.zig index 8d8227d2..a74ce825 100644 --- a/src/Elf/Options.zig +++ b/src/Elf/Options.zig @@ -20,7 +20,7 @@ static: bool = false, relax: bool = true, export_dynamic: bool = false, image_base: u64 = 0x200000, -page_size: ?u16 = null, +page_size: ?u64 = null, pie: bool = false, pic: bool = false, warn_common: bool = false, @@ -28,6 +28,7 @@ build_id: ?BuildId = null, hash_style: ?HashStyle = null, apply_dynamic_relocs: bool = true, soname: ?[]const u8 = null, +section_start: std.StringHashMapUnmanaged(u64) = .{}, /// -z flags /// Overrides default stack size. z_stack_size: ?u64 = null, @@ -251,6 +252,17 @@ pub fn parse(arena: Allocator, args: []const []const u8, ctx: anytype) !Options opts.z_relro = false; } else if (p.flagZ("muldefs")) { opts.allow_multiple_definition = true; + } else if (p.argAny("section-start")) |pair| { + var pair_it = std.mem.split(u8, pair, "="); + const name = pair_it.next() orelse ctx.fatal("expected section=address mapping", .{}); + const value = pair_it.next() orelse ctx.fatal("expected section=address mapping", .{}); + try opts.parseSectionStart(arena, name, value, ctx); + } else if (p.arg1("Tbss")) |value| { + try opts.parseSectionStart(arena, ".bss", value, ctx); + } else if (p.arg1("Ttext")) |value| { + try opts.parseSectionStart(arena, ".text", value, ctx); + } else if (p.arg1("Tdata")) |value| { + try opts.parseSectionStart(arena, ".data", value, ctx); } else { try positionals.append(.{ .tag = .path, .path = p.arg }); } @@ -397,11 +409,16 @@ const usage = \\--relax Optimize instructions (default) \\ --no-relax \\--rpath=[value], -R [value] Specify runtime path + \\--section-start section=address + \\ Set address of named section \\--shared Create dynamic library \\--soname=[value], -h [value] Set shared library name \\--start-group Ignored for compatibility with GNU \\--strip-all, -s Strip all symbols. Implies --strip-debug \\--strip-debug, -S Strip .debug_ sections + \\-Tbss Set address of .bss section + \\-Tdata Set address of .data section + \\-Ttext Set address of .text section \\--warn-common Warn about duplicate common symbols \\-z Set linker extension flags \\ execstack Require executable stack @@ -438,9 +455,9 @@ fn cpuArchToElfEmulation(cpu_arch: std.Target.Cpu.Arch) []const u8 { }; } -const supported_emulations = [_]struct { std.Target.Cpu.Arch, u16 }{ +const supported_emulations = [_]struct { std.Target.Cpu.Arch, u64 }{ .{ .x86_64, 0x1000 }, - .{ .aarch64, 0x1000 }, + .{ .aarch64, 0x10000 }, .{ .riscv64, 0x1000 }, }; @@ -453,13 +470,19 @@ fn cpuArchFromElfEmulation(value: []const u8) ?std.Target.Cpu.Arch { return null; } -pub fn defaultPageSize(cpu_arch: std.Target.Cpu.Arch) ?u16 { +pub fn defaultPageSize(cpu_arch: std.Target.Cpu.Arch) ?u64 { inline for (supported_emulations) |emulation| { if (cpu_arch == emulation[0]) return emulation[1]; } return null; } +fn parseSectionStart(opts: *Options, arena: Allocator, name: []const u8, value: []const u8, ctx: anytype) !void { + const start = std.fmt.parseInt(u64, value, 0) catch + ctx.fatal("Could not parse value '{s}' into integer\n", .{value}); + _ = try opts.section_start.put(arena, try arena.dupe(u8, name), start); +} + const cmd = "ld.zld"; pub const BuildId = enum { diff --git a/src/Elf/Symbol.zig b/src/Elf/Symbol.zig index cc202b51..5cc3269e 100644 --- a/src/Elf/Symbol.zig +++ b/src/Elf/Symbol.zig @@ -224,7 +224,7 @@ pub fn setOutputSym(symbol: Symbol, elf_file: *Elf, out: *elf.Elf64_Sym) void { break :blk 0; } if (st_shndx == elf.SHN_ABS or st_shndx == elf.SHN_COMMON) break :blk symbol.getAddress(.{ .plt = false }, elf_file); - const shdr = &elf_file.sections.items(.shdr)[st_shndx]; + const shdr = elf_file.sections.items(.shdr)[st_shndx]; if (Elf.shdrIsTls(shdr)) break :blk symbol.getAddress(.{ .plt = false }, elf_file) - elf_file.getTlsAddress(); break :blk symbol.getAddress(.{ .plt = false }, elf_file); }; diff --git a/src/Elf/relocatable.zig b/src/Elf/relocatable.zig index ab77b223..3e4a7042 100644 --- a/src/Elf/relocatable.zig +++ b/src/Elf/relocatable.zig @@ -192,7 +192,7 @@ fn allocateSections(elf_file: *Elf, base_offset: u64) void { const shdrs = elf_file.sections.slice().items(.shdr)[1..]; var offset = base_offset; for (shdrs) |*shdr| { - if (Elf.shdrIsZerofill(shdr)) continue; + if (Elf.shdrIsZerofill(shdr.*)) continue; shdr.sh_offset = mem.alignForward(u64, offset, shdr.sh_addralign); offset = shdr.sh_offset + shdr.sh_size; } diff --git a/test/elf.zig b/test/elf.zig index 7d280eb9..6440e825 100644 --- a/test/elf.zig +++ b/test/elf.zig @@ -58,6 +58,7 @@ pub fn addElfTests(b: *Build, options: common.Options) *Step { elf_step.dependOn(testRelocatableArchive(b, opts)); elf_step.dependOn(testRelocatableEhFrame(b, opts)); elf_step.dependOn(testRelocatableNoEhFrame(b, opts)); + elf_step.dependOn(testSectionStart(b, opts)); elf_step.dependOn(testSharedAbsSymbol(b, opts)); elf_step.dependOn(testStrip(b, opts)); elf_step.dependOn(testTlsCommon(b, opts)); @@ -1962,6 +1963,58 @@ fn testRelocatableNoEhFrame(b: *Build, opts: Options) *Step { return test_step; } +fn testSectionStart(b: *Build, opts: Options) *Step { + const test_step = b.step("test-elf-section-start", ""); + + { + const exe = cc(b, "exe1", opts); + exe.addCSource( + \\#include + \\__attribute__((section(".dummy"))) void dummy() { printf("dummy"); } + \\int main() { + \\ dummy(); + \\ return 0; + \\} + ); + exe.addArgs(&.{"-Wl,--section-start,.dummy=0x10000"}); + + const check = exe.check(); + check.checkInHeaders(); + check.checkExact("section headers"); + check.checkExact("name .dummy"); + check.checkExact("addr 10000"); + test_step.dependOn(&check.step); + + const run = exe.run(); + run.expectStdOutEqual("dummy"); + test_step.dependOn(run.step()); + } + + { + const exe = cc(b, "exe2", opts); + exe.addCSource( + \\#include + \\int foo; + \\int main() { + \\ return foo; + \\} + ); + exe.addArgs(&.{"-Wl,-Tbss,0x10000"}); + + const check = exe.check(); + check.checkInHeaders(); + check.checkExact("section headers"); + check.checkExact("name .bss"); + check.checkExact("addr 10000"); + test_step.dependOn(&check.step); + + const run = exe.run(); + test_step.dependOn(run.step()); + } + + return test_step; +} + fn testSharedAbsSymbol(b: *Build, opts: Options) *Step { const test_step = b.step("test-elf-shared-abs-symbol", ""); From 5848a7e0cd5ca38891fadda84306cc46030777d7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 27 Feb 2024 16:21:20 +0100 Subject: [PATCH 2/8] elf: test -section-start in combination with -Ttext --- test/elf.zig | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/elf.zig b/test/elf.zig index 6440e825..6a2d1382 100644 --- a/test/elf.zig +++ b/test/elf.zig @@ -2012,6 +2012,35 @@ fn testSectionStart(b: *Build, opts: Options) *Step { test_step.dependOn(run.step()); } + { + const exe = cc(b, "exe3", opts); + exe.addCSource( + \\#include + \\__attribute__((section(".dummy"))) void dummy() { printf("dummy"); } + \\int main() { + \\ printf("hi "); + \\ dummy(); + \\ return 0; + \\} + ); + exe.addArgs(&.{ "-Wl,--section-start,.dummy=0x10000", "-Wl,-Ttext,0x1000" }); + + const check = exe.check(); + check.checkInHeaders(); + check.checkExact("section headers"); + check.checkExact("name .text"); + check.checkExact("addr 1000"); + check.checkInHeaders(); + check.checkExact("section headers"); + check.checkExact("name .dummy"); + check.checkExact("addr 10000"); + test_step.dependOn(&check.step); + + const run = exe.run(); + run.expectStdOutEqual("hi dummy"); + test_step.dependOn(run.step()); + } + return test_step; } From c262a858c06cdeb9e9ad348b0be8ddf7188bc21d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 27 Feb 2024 16:37:36 +0100 Subject: [PATCH 3/8] elf+aarch64: test thunks --- src/Elf/Atom.zig | 2 +- test/elf.zig | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/Elf/Atom.zig b/src/Elf/Atom.zig index dff3e113..f5180e8c 100644 --- a/src/Elf/Atom.zig +++ b/src/Elf/Atom.zig @@ -1342,7 +1342,7 @@ const aarch64 = struct { atom.getName(elf_file), rel.r_offset, }); - return; + return error.RelocError; }; aarch64_util.writeBranchImm(disp, code); }, diff --git a/test/elf.zig b/test/elf.zig index 6a2d1382..4947776f 100644 --- a/test/elf.zig +++ b/test/elf.zig @@ -61,6 +61,7 @@ pub fn addElfTests(b: *Build, options: common.Options) *Step { elf_step.dependOn(testSectionStart(b, opts)); elf_step.dependOn(testSharedAbsSymbol(b, opts)); elf_step.dependOn(testStrip(b, opts)); + elf_step.dependOn(testThunks(b, opts)); elf_step.dependOn(testTlsCommon(b, opts)); elf_step.dependOn(testTlsDesc(b, opts)); elf_step.dependOn(testTlsDescImport(b, opts)); @@ -2138,6 +2139,32 @@ fn testStrip(b: *Build, opts: Options) *Step { return test_step; } +fn testThunks(b: *Build, opts: Options) *Step { + const test_step = b.step("test-elf-thunks", ""); + + if (builtin.target.cpu.arch != .aarch64) return skipTestStep(test_step); + + const exe = cc(b, "a.out", opts); + exe.addCSource( + \\void foo(); + \\void foobar(); + \\__attribute__((section(".foo"))) void foo() { foobar(); } + \\__attribute__((section(".foobar"))) void foobar() { foo(); } + \\int main() { + \\ foo(); + \\ return 0; + \\} + ); + exe.addArgs(&.{ "-Wl,--section-start,.foo=0x1000", "-Wl,--section-start,.foobar=0x20000000" }); + + const check = exe.check(); + check.checkInSymtab(); + check.checkContains("foo$thunk"); + test_step.dependOn(&check.step); + + return test_step; +} + fn testTlsCommon(b: *Build, opts: Options) *Step { const test_step = b.step("test-elf-tls-common", ""); From 9b05156aa8cd3a628ef1d200092bcc04db70972b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 28 Feb 2024 10:18:11 +0100 Subject: [PATCH 4/8] elf+aarch64: add logic for deciding if we need range extenders --- src/Elf.zig | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/Elf.zig b/src/Elf.zig index dd773b8e..3d1bcfc9 100644 --- a/src/Elf.zig +++ b/src/Elf.zig @@ -735,8 +735,11 @@ fn calcSectionSizes(self: *Elf) !void { const tracy = trace(@src()); defer tracy.end(); - for (self.sections.items(.shdr), self.sections.items(.atoms)) |*shdr, atoms| { + const slice = self.sections.slice(); + + for (slice.items(.shdr), slice.items(.atoms)) |*shdr, atoms| { if (atoms.items.len == 0) continue; + if (self.requiresThunks() and shdr.sh_flags & elf.SHF_EXECINSTR != 0) continue; for (atoms.items) |atom_index| { const atom = self.getAtom(atom_index).?; @@ -749,6 +752,16 @@ fn calcSectionSizes(self: *Elf) !void { } } + if (self.requiresThunks()) { + for (slice.items(.shdr), slice.items(.atoms), 0..) |shdr, atoms, i| { + if (shdr.sh_flags & elf.SHF_EXECINSTR == 0) continue; + if (atoms.items.len == 0) continue; + + // Create jump/branch range extenders if needed. + try thunks.createThunks(@intCast(i), self); + } + } + if (self.eh_frame_sect_index) |index| { const shdr = &self.sections.items(.shdr)[index]; shdr.sh_size = try eh_frame.calcEhFrameSize(self); @@ -2761,6 +2774,14 @@ pub fn getTlsAddress(self: *Elf) u64 { return phdr.p_vaddr; } +fn requiresThunks(self: Elf) bool { + return switch (self.options.cpu_arch.?) { + .aarch64 => true, + .x86_64, .riscv64 => false, + else => @panic("unsupported architecture"), + }; +} + /// Caller owns the memory. pub fn preadAllAlloc(allocator: Allocator, file: std.fs.File, offset: usize, size: usize) ![]u8 { const buffer = try allocator.alloc(u8, size); @@ -2951,6 +2972,7 @@ const relocatable = @import("Elf/relocatable.zig"); const relocation = @import("Elf/relocation.zig"); const state_log = std.log.scoped(.state); const synthetic = @import("Elf/synthetic.zig"); +const thunks = @import("Elf/thunks.zig"); const trace = @import("tracy.zig").trace; const Allocator = mem.Allocator; From 5c9db83cf8a2d67b3ba55adfea9be945543d2810 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 28 Feb 2024 23:52:03 +0100 Subject: [PATCH 5/8] elf+aarch64: emit thunks --- src/Elf.zig | 19 +++++++++++++++++++ src/Elf/Atom.zig | 21 ++++++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/Elf.zig b/src/Elf.zig index 3d1bcfc9..bc1f795b 100644 --- a/src/Elf.zig +++ b/src/Elf.zig @@ -85,6 +85,7 @@ rela_plt: std.ArrayListUnmanaged(elf.Elf64_Rela) = .{}, comdat_group_sections: std.ArrayListUnmanaged(ComdatGroupSection) = .{}, atoms: std.ArrayListUnmanaged(Atom) = .{}, +thunks: std.ArrayListUnmanaged(Thunk) = .{}, comdat_groups: std.ArrayListUnmanaged(ComdatGroup) = .{}, comdat_groups_owners: std.ArrayListUnmanaged(ComdatGroupOwner) = .{}, @@ -137,6 +138,7 @@ pub fn deinit(self: *Elf) void { self.symtab.deinit(gpa); self.strtab.deinit(gpa); self.atoms.deinit(gpa); + self.thunks.deinit(gpa); self.comdat_groups.deinit(gpa); self.comdat_groups_owners.deinit(gpa); self.comdat_groups_table.deinit(gpa); @@ -2545,6 +2547,18 @@ pub fn getAtom(self: Elf, atom_index: Atom.Index) ?*Atom { return &self.atoms.items[atom_index]; } +pub fn addThunk(self: *Elf) !Thunk.Index { + const index = @as(Thunk.Index, @intCast(self.thunks.items.len)); + const thunk = try self.thunks.addOne(self.base.allocator); + thunk.* = .{}; + return index; +} + +pub fn getThunk(self: *Elf, index: Thunk.Index) *Thunk { + assert(index < self.thunks.items.len); + return &self.thunks.items[index]; +} + pub fn addSymbol(self: *Elf) !Symbol.Index { const index = @as(Symbol.Index, @intCast(self.symbols.items.len)); const symbol = try self.symbols.addOne(self.base.allocator); @@ -2876,6 +2890,10 @@ fn fmtDumpState( try writer.print("internal({d}) : internal\n", .{index}); try writer.print("{}\n", .{internal.fmtSymtab(self)}); } + try writer.writeAll("THUNKS\n"); + for (self.thunks.items, 0..) |thunk, index| { + try writer.print("thunk({d}) : {}\n", .{ index, thunk.fmt(self) }); + } try writer.print("GOT\n{}\n", .{self.got.fmt(self)}); try writer.writeAll("PLT\n"); for (self.plt.symbols.items, 0..) |sym_index, i| { @@ -2998,5 +3016,6 @@ const SharedObject = @import("Elf/SharedObject.zig"); const StringTable = @import("strtab.zig").StringTable; const Symbol = @import("Elf/Symbol.zig"); const ThreadPool = std.Thread.Pool; +const Thunk = @import("Elf/thunks.zig").Thunk; const VerneedSection = synthetic.VerneedSection; const Zld = @import("Zld.zig"); diff --git a/src/Elf/Atom.zig b/src/Elf/Atom.zig index f5180e8c..4de13499 100644 --- a/src/Elf/Atom.zig +++ b/src/Elf/Atom.zig @@ -31,6 +31,9 @@ rel_num: u32 = 0, /// Index of this atom in the linker's atoms table. atom_index: Index = 0, +/// Index of the thunk for this atom. +thunk_index: Thunk.Index = 0, + flags: Flags = .{}, /// Start index of FDEs referencing this atom. @@ -100,6 +103,10 @@ pub fn getRelocs(self: Atom, elf_file: *Elf) []const elf.Elf64_Rela { return object.relocs.items[self.rel_index..][0..self.rel_num]; } +pub fn getThunk(self: Atom, elf_file: *Elf) *Thunk { + return elf_file.getThunk(self.thunk_index); +} + pub fn writeRelocs(self: Atom, elf_file: *Elf, out_relocs: *std.ArrayList(elf.Elf64_Rela)) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1312,6 +1319,7 @@ const aarch64 = struct { try stream.seekTo(rel.r_offset); const cwriter = stream.writer(); const code = code_buffer[rel.r_offset..][0..4]; + const object = atom.getObject(elf_file); const P, const A, const S, const GOT, const G, const TP, const DTP = args; _ = DTP; @@ -1336,13 +1344,11 @@ const aarch64 = struct { .CALL26, .JUMP26, => { - // TODO: add thunk support - const disp: i28 = math.cast(i28, S + A - P) orelse { - elf_file.base.fatal("{s}: {x}: TODO relocation target exceeds max jump distance", .{ - atom.getName(elf_file), - rel.r_offset, - }); - return error.RelocError; + const disp: i28 = math.cast(i28, S + A - P) orelse blk: { + const thunk = atom.getThunk(elf_file); + const target_index = object.symbols.items[rel.r_sym()]; + const S_: i64 = @intCast(thunk.getTargetAddress(target_index, elf_file)); + break :blk math.cast(i28, S_ + A - P) orelse return error.Overflow; }; aarch64_util.writeBranchImm(disp, code); }, @@ -1817,3 +1823,4 @@ const Fde = @import("eh_frame.zig").Fde; const File = @import("file.zig").File; const Object = @import("Object.zig"); const Symbol = @import("Symbol.zig"); +const Thunk = @import("thunks.zig").Thunk; From 5515dd7243aa60e7b8adae72a60b7a39be137eea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 29 Feb 2024 00:10:21 +0100 Subject: [PATCH 6/8] elf+aarch64: emit $thunk locals in symtab --- src/Elf.zig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Elf.zig b/src/Elf.zig index bc1f795b..8f736472 100644 --- a/src/Elf.zig +++ b/src/Elf.zig @@ -896,6 +896,13 @@ pub fn calcSymtabSize(self: *Elf) !void { nlocals += 1; } + for (self.thunks.items) |*thunk| { + thunk.output_symtab_ctx.ilocal = nlocals + 1; + thunk.calcSymtabSize(self); + nlocals += thunk.output_symtab_ctx.nlocals; + strsize += thunk.output_symtab_ctx.strsize; + } + for (files.items) |index| { const file = self.getFile(index).?; const ctx = switch (file) { @@ -965,6 +972,10 @@ pub fn writeSymtab(self: *Elf) !void { self.writeSectionSymbols(); + for (self.thunks.items) |thunk| { + thunk.writeSymtab(self); + } + for (self.objects.items) |index| { self.getFile(index).?.writeSymtab(self); } From 11b706a8e949c6734bd079c5b210216c51ebb8c4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 29 Feb 2024 00:31:50 +0100 Subject: [PATCH 7/8] elf+aarch64: actually commit thunks.zig file --- src/Elf/thunks.zig | 240 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 src/Elf/thunks.zig diff --git a/src/Elf/thunks.zig b/src/Elf/thunks.zig new file mode 100644 index 00000000..ecf8f646 --- /dev/null +++ b/src/Elf/thunks.zig @@ -0,0 +1,240 @@ +pub fn createThunks(shndx: u32, elf_file: *Elf) !void { + const gpa = elf_file.base.allocator; + const cpu_arch = elf_file.options.cpu_arch.?; + const slice = elf_file.sections.slice(); + const shdr = &slice.items(.shdr)[shndx]; + const atoms = slice.items(.atoms)[shndx].items; + assert(atoms.len > 0); + + for (atoms) |atom_index| { + elf_file.getAtom(atom_index).?.value = @bitCast(@as(i64, -1)); + } + + var i: usize = 0; + while (i < atoms.len) { + const start = i; + const start_atom = elf_file.getAtom(atoms[start]).?; + assert(start_atom.flags.alive); + start_atom.value = try advance(shdr, start_atom.size, start_atom.alignment); + i += 1; + + while (i < atoms.len and + shdr.sh_size - start_atom.value < maxAllowedDistance(cpu_arch)) : (i += 1) + { + const atom_index = atoms[i]; + const atom = elf_file.getAtom(atom_index).?; + assert(atom.flags.alive); + atom.value = try advance(shdr, atom.size, atom.alignment); + } + + // Insert a thunk at the group end + const thunk_index = try elf_file.addThunk(); + const thunk = elf_file.getThunk(thunk_index); + thunk.out_shndx = shndx; + + // Scan relocs in the group and create trampolines for any unreachable callsite + for (atoms[start..i]) |atom_index| { + const atom = elf_file.getAtom(atom_index).?; + const object = atom.getObject(elf_file); + log.debug("atom({d}) {s}", .{ atom_index, atom.getName(elf_file) }); + for (atom.getRelocs(elf_file)) |rel| { + const is_reachable = switch (cpu_arch) { + .aarch64 => aarch64.isReachable(atom, rel, elf_file), + .x86_64, .riscv64 => unreachable, + else => @panic("unsupported arch"), + }; + if (is_reachable) continue; + const target = object.symbols.items[rel.r_sym()]; + try thunk.symbols.put(gpa, target, {}); + } + atom.thunk_index = thunk_index; + } + + thunk.value = try advance(shdr, thunk.size(elf_file), 2); + + log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(elf_file) }); + } +} + +fn advance(shdr: *elf.Elf64_Shdr, size: u64, pow2_align: u8) !u64 { + const alignment = try math.powi(u32, 2, pow2_align); + const offset = mem.alignForward(u64, shdr.sh_size, alignment); + const padding = offset - shdr.sh_size; + shdr.sh_size += padding + size; + shdr.sh_addralign = @max(shdr.sh_addralign, alignment); + return offset; +} + +/// A branch will need an extender if its target is larger than +/// `2^(jump_bits - 1) - margin` where margin is some arbitrary number. +fn maxAllowedDistance(cpu_arch: std.Target.Cpu.Arch) u32 { + return switch (cpu_arch) { + .aarch64 => 0x500_000, + .x86_64, .riscv64 => unreachable, + else => @panic("unhandled arch"), + }; +} + +pub const Thunk = struct { + value: u64 = 0, + out_shndx: u32 = 0, + symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, + output_symtab_ctx: Elf.SymtabCtx = .{}, + + pub fn deinit(thunk: *Thunk, allocator: Allocator) void { + thunk.symbols.deinit(allocator); + } + + pub fn size(thunk: Thunk, elf_file: *Elf) usize { + const cpu_arch = elf_file.options.cpu_arch.?; + return thunk.symbols.keys().len * trampolineSize(cpu_arch); + } + + pub fn getAddress(thunk: Thunk, elf_file: *Elf) u64 { + const shdr = elf_file.sections.items(.shdr)[thunk.out_shndx]; + return shdr.sh_addr + thunk.value; + } + + pub fn getTargetAddress(thunk: Thunk, sym_index: Symbol.Index, elf_file: *Elf) u64 { + const cpu_arch = elf_file.options.cpu_arch.?; + return thunk.getAddress(elf_file) + thunk.symbols.getIndex(sym_index).? * trampolineSize(cpu_arch); + } + + pub fn write(thunk: Thunk, elf_file: *Elf, writer: anytype) !void { + switch (elf_file.options.cpu_arch.?) { + .aarch64 => try aarch64.write(thunk, elf_file, writer), + .x86_64, .riscv64 => unreachable, + else => @panic("unhandled arch"), + } + } + + pub fn calcSymtabSize(thunk: *Thunk, elf_file: *Elf) void { + if (elf_file.options.strip_all) return; + + thunk.output_symtab_ctx.nlocals = @as(u32, @intCast(thunk.symbols.keys().len)); + for (thunk.symbols.keys()) |sym_index| { + const sym = elf_file.getSymbol(sym_index); + thunk.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(elf_file).len + "$thunk".len + 1)); + } + } + + pub fn writeSymtab(thunk: Thunk, elf_file: *Elf) void { + if (elf_file.options.strip_all) return; + const cpu_arch = elf_file.options.cpu_arch.?; + + for (thunk.symbols.keys(), thunk.output_symtab_ctx.ilocal..) |sym_index, ilocal| { + const sym = elf_file.getSymbol(sym_index); + const st_name = @as(u32, @intCast(elf_file.strtab.items.len)); + elf_file.strtab.appendSliceAssumeCapacity(sym.getName(elf_file)); + elf_file.strtab.appendSliceAssumeCapacity("$thunk"); + elf_file.strtab.appendAssumeCapacity(0); + elf_file.symtab.items[ilocal] = .{ + .st_name = st_name, + .st_info = elf.STT_FUNC, + .st_other = 0, + .st_shndx = @intCast(thunk.out_shndx), + .st_value = thunk.getTargetAddress(sym_index, elf_file), + .st_size = trampolineSize(cpu_arch), + }; + } + } + + fn trampolineSize(cpu_arch: std.Target.Cpu.Arch) usize { + return switch (cpu_arch) { + .aarch64 => aarch64.trampoline_size, + .x86_64, .riscv64 => unreachable, + else => @panic("unhandled arch"), + }; + } + + pub fn format( + thunk: Thunk, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = thunk; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format Thunk directly"); + } + + pub fn fmt(thunk: Thunk, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ + .thunk = thunk, + .elf_file = elf_file, + } }; + } + + const FormatContext = struct { + thunk: Thunk, + elf_file: *Elf, + }; + + fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + const thunk = ctx.thunk; + const elf_file = ctx.elf_file; + try writer.print("@{x} : size({x})\n", .{ thunk.value, thunk.size(elf_file) }); + for (thunk.symbols.keys()) |index| { + const sym = elf_file.getSymbol(index); + try writer.print(" %{d} : {s} : @{x}\n", .{ index, sym.getName(elf_file), sym.value }); + } + } + + pub const Index = u32; +}; + +const aarch64 = struct { + fn isReachable(atom: *const Atom, rel: elf.Elf64_Rela, elf_file: *Elf) bool { + const r_type: elf.R_AARCH64 = @enumFromInt(rel.r_type()); + if (r_type != .CALL26 and r_type != .JUMP26) return true; + const object = atom.getObject(elf_file); + const target = object.getSymbol(rel.r_sym(), elf_file); + if (target.flags.plt) return false; + if (atom.out_shndx != target.shndx) return false; + const target_atom = target.getAtom(elf_file).?; + if (target_atom.value == @as(u64, @bitCast(@as(i64, -1)))) return false; + const saddr = @as(i64, @intCast(atom.getAddress(elf_file) + rel.r_offset)); + const taddr: i64 = @intCast(target.getAddress(.{}, elf_file)); + _ = math.cast(i28, taddr + rel.r_addend - saddr) orelse return false; + return true; + } + + fn write(thunk: Thunk, elf_file: *Elf, writer: anytype) !void { + for (thunk.symbols.keys(), 0..) |sym_index, i| { + const sym = elf_file.getSymbol(sym_index); + const saddr = thunk.getAddress(elf_file) + i * trampoline_size; + const taddr = sym.getAddress(.{}, elf_file); + const pages = try util.calcNumberOfPages(saddr, taddr); + try writer.writeInt(u32, Instruction.adrp(.x16, pages).toU32(), .little); + const off: u12 = @truncate(taddr); + try writer.writeInt(u32, Instruction.add(.x16, .x16, off, false).toU32(), .little); + try writer.writeInt(u32, Instruction.br(.x16).toU32(), .little); + } + } + + const trampoline_size = 3 * @sizeOf(u32); + + const util = @import("../aarch64.zig"); + const Instruction = util.Instruction; +}; + +const assert = std.debug.assert; +const elf = std.elf; +const log = std.log.scoped(.elf); +const math = std.math; +const mem = std.mem; +const std = @import("std"); + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); +const Symbol = @import("Symbol.zig"); From 7be87fc4eaba081d7d3163691531221c3506915c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 29 Feb 2024 00:35:04 +0100 Subject: [PATCH 8/8] test: fixes for latest zig libstd changes --- test/test.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.zig b/test/test.zig index a77aa910..113ad855 100644 --- a/test/test.zig +++ b/test/test.zig @@ -16,7 +16,7 @@ pub fn addTests(b: *Build, comp: *Compile, build_opts: struct { }; const cc_override: ?[]const u8 = std.process.getEnvVarOwned(b.allocator, "CC") catch |e| switch (e) { error.EnvironmentVariableNotFound => null, - error.InvalidUtf8 => @panic("InvalidUtf8"), + error.InvalidWtf8 => @panic("InvalidWtf8"), error.OutOfMemory => @panic("OOM"), }; const zld = WriteFile.create(b).addCopyFile(comp.getEmittedBin(), "ld");