From 455223b8460e71c9849f4fdfa6e174233a7f8dac Mon Sep 17 00:00:00 2001 From: Lucas Steuernagel Date: Fri, 18 Oct 2024 04:29:52 -0300 Subject: [PATCH 1/2] Adjust FunctionRegistry and loader --- src/elf.rs | 3 ++ src/program.rs | 124 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 118 insertions(+), 9 deletions(-) diff --git a/src/elf.rs b/src/elf.rs index 287f53e7..62cdc575 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -107,6 +107,9 @@ pub enum ElfError { /// Invalid program header #[error("Invalid ELF program header")] InvalidProgramHeader, + /// Invalid syscall code + #[error("Invalid function index")] + InvalidDenseFunctionIndex, } impl From for ElfError { diff --git a/src/program.rs b/src/program.rs index 3c844317..9221c862 100644 --- a/src/program.rs +++ b/src/program.rs @@ -87,21 +87,40 @@ impl SBPFVersion { } } +/// Operand modes of the FunctionRegistry +#[derive(Debug, PartialEq, Eq)] +enum IndexMode { + /// Allows sparse keys + Sparse, + /// Only allows keys within the range between 1 and a maximum value (u32). + Dense(u32), +} + /// Holds the function symbols of an Executable #[derive(Debug, PartialEq, Eq)] pub struct FunctionRegistry { pub(crate) map: BTreeMap, T)>, + mode: IndexMode, } impl Default for FunctionRegistry { fn default() -> Self { Self { map: BTreeMap::new(), + mode: IndexMode::Sparse, } } } impl FunctionRegistry { + /// Create a function registry with dense indexing + pub fn new_dense(size: u32) -> Self { + Self { + map: BTreeMap::new(), + mode: IndexMode::Dense(size), + } + } + /// Register a symbol with an explicit key pub fn register_function( &mut self, @@ -109,6 +128,12 @@ impl FunctionRegistry { name: impl Into>, value: T, ) -> Result<(), ElfError> { + if let IndexMode::Dense(size) = self.mode { + if key == 0 || key > size { + return Err(ElfError::InvalidDenseFunctionIndex); + } + } + match self.map.entry(key) { Entry::Vacant(entry) => { entry.insert((name.into(), value)); @@ -128,6 +153,9 @@ impl FunctionRegistry { name: impl Into>, value: T, ) -> Result { + if matches!(self.mode, IndexMode::Dense(_)) { + return Err(ElfError::InvalidDenseFunctionIndex); + } let name = name.into(); let key = ebpf::hash_symbol_name(name.as_slice()); self.register_function(key, name, value)?; @@ -145,6 +173,9 @@ impl FunctionRegistry { where usize: From, { + if matches!(self.mode, IndexMode::Dense(_)) { + return Err(ElfError::InvalidDenseFunctionIndex); + } let name = name.into(); let config = loader.get_config(); let key = if hash_symbol_name { @@ -228,22 +259,36 @@ pub type BuiltinFunction = fn(*mut EbpfVm, u64, u64, u64, u64, u64); pub struct BuiltinProgram { /// Holds the Config if this is a loader program config: Option>, - /// Function pointers by symbol - functions: FunctionRegistry>, + /// Function pointers by symbol with sparse indexing + sparse_registry: FunctionRegistry>, + /// Function pointers by symbol with dense indexing + dense_registry: FunctionRegistry>, } impl PartialEq for BuiltinProgram { fn eq(&self, other: &Self) -> bool { - self.config.eq(&other.config) && self.functions.eq(&other.functions) + self.config.eq(&other.config) + && self.sparse_registry.eq(&other.sparse_registry) + && self.dense_registry.eq(&other.dense_registry) } } impl BuiltinProgram { + /// Creates a loader containing simultaneously a sparse and a dense function registry. + pub fn new_loader_with_dense_registry(config: Config, dense_size: u32) -> Self { + Self { + config: Some(Box::new(config)), + sparse_registry: FunctionRegistry::default(), + dense_registry: FunctionRegistry::new_dense(dense_size), + } + } + /// Constructs a loader built-in program pub fn new_loader(config: Config, functions: FunctionRegistry>) -> Self { Self { config: Some(Box::new(config)), - functions, + sparse_registry: functions, + dense_registry: FunctionRegistry::new_dense(0), } } @@ -251,7 +296,8 @@ impl BuiltinProgram { pub fn new_builtin(functions: FunctionRegistry>) -> Self { Self { config: None, - functions, + sparse_registry: functions, + dense_registry: FunctionRegistry::new_dense(0), } } @@ -259,7 +305,8 @@ impl BuiltinProgram { pub fn new_mock() -> Self { Self { config: Some(Box::default()), - functions: FunctionRegistry::default(), + sparse_registry: FunctionRegistry::default(), + dense_registry: FunctionRegistry::new_dense(0), } } @@ -270,7 +317,7 @@ impl BuiltinProgram { /// Get the function registry pub fn get_function_registry(&self) -> &FunctionRegistry> { - &self.functions + &self.sparse_registry } /// Calculate memory size @@ -281,7 +328,19 @@ impl BuiltinProgram { } else { 0 }) - .saturating_add(self.functions.mem_size()) + .saturating_add(self.sparse_registry.mem_size()) + } + + /// Register a function both in the sparse and dense registries + pub fn register_function( + &mut self, + name: &str, + value: BuiltinFunction, + dense_key: u32, + ) -> Result<(), ElfError> { + self.sparse_registry.register_function_hashed(name, value)?; + self.dense_registry + .register_function(dense_key, name, value) } } @@ -290,7 +349,7 @@ impl std::fmt::Debug for BuiltinProgram { writeln!(f, "{:?}", unsafe { // `derive(Debug)` does not know that `C: ContextObject` does not need to implement `Debug` std::mem::transmute::<&FunctionRegistry>, &FunctionRegistry>( - &self.functions, + &self.sparse_registry, ) })?; Ok(()) @@ -388,4 +447,51 @@ mod tests { let builtin_program_c = BuiltinProgram::new_loader(Config::default(), function_registry_c); assert_ne!(builtin_program_a, builtin_program_c); } + + #[test] + fn dense_registry() { + let mut dense_registry = + FunctionRegistry::>::new_dense(2); + let res = dense_registry.register_function(0, b"syscall_u64", syscalls::SyscallU64::vm); + assert_eq!(res.err(), Some(ElfError::InvalidDenseFunctionIndex)); + + let res = dense_registry.register_function(5, b"syscall_u64", syscalls::SyscallU64::vm); + assert_eq!(res.err(), Some(ElfError::InvalidDenseFunctionIndex)); + + let res = + dense_registry.register_function(1, b"syscall_string", syscalls::SyscallString::vm); + assert!(res.is_ok()); + + let res = dense_registry.lookup_by_key(0); + assert!(res.is_none()); + let res = dense_registry.lookup_by_key(3); + assert!(res.is_none()); + let res = dense_registry.lookup_by_key(1); + assert_eq!(res.unwrap().0, b"syscall_string"); + + let res = dense_registry.register_function_hashed("syscall_u64", syscalls::SyscallU64::vm); + assert_eq!(res.err(), Some(ElfError::InvalidDenseFunctionIndex)); + + let mut dense_reg_usize = FunctionRegistry::::new_dense(2); + let res = dense_reg_usize.register_function_hashed_legacy( + &BuiltinProgram::::new_mock(), + false, + "syscall_u64", + 4, + ); + assert_eq!(res.err(), Some(ElfError::InvalidDenseFunctionIndex)); + } + + #[test] + fn loader_registry() { + let mut loader = BuiltinProgram::new_loader_with_dense_registry(Config::default(), 2); + let res = loader.register_function("syscall_u64", syscalls::SyscallU64::vm, 0); + assert!(res.is_err()); + + let res = loader.register_function("syscall_u64", syscalls::SyscallU64::vm, 5); + assert!(res.is_err()); + + let res = loader.register_function("syscall_u64", syscalls::SyscallU64::vm, 2); + assert!(res.is_ok()); + } } From 796e0370635fd80443b0f689ed49fa9fbe4bd2d5 Mon Sep 17 00:00:00 2001 From: Lucas Steuernagel Date: Fri, 18 Oct 2024 08:40:01 -0300 Subject: [PATCH 2/2] Rename get function --- src/disassembler.rs | 2 +- src/elf.rs | 7 +++++-- src/interpreter.rs | 2 +- src/jit.rs | 2 +- src/program.rs | 10 +++++++--- src/static_analysis.rs | 2 +- 6 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/disassembler.rs b/src/disassembler.rs index b500b0a5..3248ffc2 100644 --- a/src/disassembler.rs +++ b/src/disassembler.rs @@ -271,7 +271,7 @@ pub fn disassemble_instruction( function_name } else { name = "syscall"; - loader.get_function_registry().lookup_by_key(insn.imm as u32).map(|(function_name, _)| String::from_utf8_lossy(function_name).to_string()).unwrap_or_else(|| "[invalid]".to_string()) + loader.get_sparse_function_registry().lookup_by_key(insn.imm as u32).map(|(function_name, _)| String::from_utf8_lossy(function_name).to_string()).unwrap_or_else(|| "[invalid]".to_string()) }; desc = format!("{name} {function_name}"); }, diff --git a/src/elf.rs b/src/elf.rs index 62cdc575..eeefb7cf 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -318,7 +318,7 @@ impl Executable { self.get_config(), self.get_sbpf_version(), self.get_function_registry(), - self.loader.get_function_registry(), + self.loader.get_sparse_function_registry(), )?; Ok(()) } @@ -1074,7 +1074,10 @@ impl Executable { .entry(symbol.st_name) .or_insert_with(|| ebpf::hash_symbol_name(name)); if config.reject_broken_elfs - && loader.get_function_registry().lookup_by_key(hash).is_none() + && loader + .get_sparse_function_registry() + .lookup_by_key(hash) + .is_none() { return Err(ElfError::UnresolvedSymbol( String::from_utf8_lossy(name).to_string(), diff --git a/src/interpreter.rs b/src/interpreter.rs index 34ea6dd8..26e8c800 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -536,7 +536,7 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> { }; if external { - if let Some((_function_name, function)) = self.executable.get_loader().get_function_registry().lookup_by_key(insn.imm as u32) { + if let Some((_function_name, function)) = self.executable.get_loader().get_sparse_function_registry().lookup_by_key(insn.imm as u32) { resolved = true; self.vm.due_insn_count = self.vm.previous_instruction_meter - self.vm.due_insn_count; diff --git a/src/jit.rs b/src/jit.rs index 1f48034e..c961828f 100644 --- a/src/jit.rs +++ b/src/jit.rs @@ -717,7 +717,7 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { }; if external { - if let Some((_function_name, function)) = self.executable.get_loader().get_function_registry().lookup_by_key(insn.imm as u32) { + if let Some((_function_name, function)) = self.executable.get_loader().get_sparse_function_registry().lookup_by_key(insn.imm as u32) { self.emit_validate_and_profile_instruction_count(false, Some(0)); self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_SCRATCH, function as usize as i64)); self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_EXTERNAL_FUNCTION_CALL, 5))); diff --git a/src/program.rs b/src/program.rs index 9221c862..0612b773 100644 --- a/src/program.rs +++ b/src/program.rs @@ -184,7 +184,11 @@ impl FunctionRegistry { } else { ebpf::hash_symbol_name(&usize::from(value).to_le_bytes()) }; - if loader.get_function_registry().lookup_by_key(hash).is_some() { + if loader + .get_sparse_function_registry() + .lookup_by_key(hash) + .is_some() + { return Err(ElfError::SymbolHashCollision(hash)); } hash @@ -315,8 +319,8 @@ impl BuiltinProgram { self.config.as_ref().unwrap() } - /// Get the function registry - pub fn get_function_registry(&self) -> &FunctionRegistry> { + /// Get the sparse function registry + pub fn get_sparse_function_registry(&self) -> &FunctionRegistry> { &self.sparse_registry } diff --git a/src/static_analysis.rs b/src/static_analysis.rs index aa5d0ae3..ab73c6cc 100644 --- a/src/static_analysis.rs +++ b/src/static_analysis.rs @@ -236,7 +236,7 @@ impl<'a> Analysis<'a> { if let Some((function_name, _function)) = self .executable .get_loader() - .get_function_registry() + .get_sparse_function_registry() .lookup_by_key(insn.imm as u32) { if function_name == b"abort" {