From 5d58ba75c865055def40ddcef363edf93d140a52 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Mon, 20 Nov 2023 18:59:15 +0530 Subject: [PATCH 01/22] feat: initial standalone executable builder impl --- .lune/csv_printer.luau | 2 +- src/cli/build.rs | 40 ++++++++++++++++++++++++++++++ src/cli/mod.rs | 55 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 src/cli/build.rs diff --git a/.lune/csv_printer.luau b/.lune/csv_printer.luau index 2949598b..f077d52f 100644 --- a/.lune/csv_printer.luau +++ b/.lune/csv_printer.luau @@ -60,4 +60,4 @@ for rowIndex, row in csvTable do print(string.format("┣%s┫", thiccLine)) end end -print(string.format("┗%s┛", thiccLine)) +print(string.format("┗%s┛", thiccLine)) \ No newline at end of file diff --git a/src/cli/build.rs b/src/cli/build.rs new file mode 100644 index 00000000..398286ae --- /dev/null +++ b/src/cli/build.rs @@ -0,0 +1,40 @@ +use std::{ + env, + path::{Path, PathBuf}, +}; +use tokio::fs; + +use anyhow::Result; +use mlua::Compiler as LuaCompiler; + +pub async fn build_standalone + Into>( + output_path: T, + code: impl AsRef<[u8]>, +) -> Result<()> { + // First, we read the contents of the lune interpreter as our starting point + let mut patched_bin = fs::read(env::current_exe()?).await?; + + // The signature which separates indicates the presence of bytecode to execute + // If a binary contains this signature, that must mean it is a standalone binar + let signature: Vec = vec![0x12, 0xed, 0x93, 0x14, 0x28]; + + // Append the signature to the base binary + for byte in signature { + patched_bin.push(byte); + } + + // Compile luau input into bytecode + let mut bytecode = LuaCompiler::new() + .set_optimization_level(2) + .set_coverage_level(0) + .set_debug_level(0) + .compile(code); + + // Append compiled bytecode to binary and finalize + patched_bin.append(&mut bytecode); + + // Write the compiled binary to file + fs::write(output_path, patched_bin).await?; + + Ok(()) +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs index cb7daeeb..e2c0345e 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,4 +1,4 @@ -use std::{fmt::Write as _, process::ExitCode}; +use std::{env, fmt::Write as _, path::PathBuf, process::ExitCode}; use anyhow::{Context, Result}; use clap::Parser; @@ -9,6 +9,7 @@ use tokio::{ io::{stdin, AsyncReadExt}, }; +pub(crate) mod build; pub(crate) mod gen; pub(crate) mod repl; pub(crate) mod setup; @@ -20,6 +21,8 @@ use utils::{ listing::{find_lune_scripts, sort_lune_scripts, write_lune_scripts_list}, }; +use self::build::build_standalone; + /// A Luau script runner #[derive(Parser, Debug, Default, Clone)] #[command(version, long_about = None)] @@ -44,6 +47,8 @@ pub struct Cli { /// Generate a Lune documentation file for Luau LSP #[clap(long, hide = true)] generate_docs_file: bool, + #[clap(long, hide = true)] + build: bool, } #[allow(dead_code)] @@ -116,6 +121,7 @@ impl Cli { return Ok(ExitCode::SUCCESS); } + // Generate (save) definition files, if wanted let generate_file_requested = self.setup || self.generate_luau_types @@ -143,14 +149,35 @@ impl Cli { if generate_file_requested { return Ok(ExitCode::SUCCESS); } - // If we did not generate any typedefs we know that the user did not - // provide any other options, and in that case we should enter the REPL - return repl::show_interface().await; + + // Signature which is only present in standalone lune binaries + let signature: Vec = vec![0x12, 0xed, 0x93, 0x14, 0x28]; + + // Read the current lune binary to memory + let bin = read_to_vec(env::current_exe()?).await?; + + // Check to see if the lune executable includes the signature + return match bin + .windows(signature.len()) + .position(|block| block == signature) + { + // If we find the signature, all bytes after the 5 signature bytes must be bytecode + Some(offset) => Ok(Lune::new() + .with_args(self.script_args) + .run("STANDALONE", &bin[offset + signature.len()..bin.len()]) + .await?), + + // If we did not generate any typedefs, know we're not a precompiled bin and + // we know that the user did not provide any other options, and in that + // case we should enter the REPL + None => repl::show_interface().await, + }; } // Figure out if we should read from stdin or from a file, // reading from stdin is marked by passing a single "-" // (dash) as the script name to run to the cli let script_path = self.script_path.unwrap(); + let (script_display_name, script_contents) = if script_path == "-" { let mut stdin_contents = Vec::new(); stdin() @@ -165,6 +192,26 @@ impl Cli { let file_display_name = file_path.with_extension("").display().to_string(); (file_display_name, file_contents) }; + + if self.build { + let output_path = + PathBuf::from(script_path.clone()).with_extension(env::consts::EXE_EXTENSION); + println!( + "Building {script_path} to {}", + output_path.to_string_lossy() + ); + + return Ok( + match build_standalone(output_path, strip_shebang(script_contents.clone())).await { + Ok(()) => ExitCode::SUCCESS, + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + }, + ); + } + // Create a new lune object with all globals & run the script let result = Lune::new() .with_args(self.script_args) From dc2bab3f100b9c7903ea34cc323823f641885228 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Tue, 21 Nov 2023 22:36:45 +0530 Subject: [PATCH 02/22] feat: initial unfinished rewrite for perf --- Cargo.lock | 54 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/cli/build.rs | 32 ++++++++++++++++------ src/cli/mod.rs | 71 +++++++++++++++++++++++++++++++++++++----------- 4 files changed, 133 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index de929114..90a3106f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,6 +468,30 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.16" @@ -1127,6 +1151,7 @@ dependencies = [ "path-clean", "pin-project", "rand", + "rayon", "rbx_binary", "rbx_cookie", "rbx_dom_weak", @@ -1192,6 +1217,15 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1547,6 +1581,26 @@ dependencies = [ "getrandom 0.2.10", ] +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rbx_binary" version = "0.7.3" diff --git a/Cargo.toml b/Cargo.toml index 6474e9d9..8338685e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,6 +124,7 @@ regex = { optional = true, version = "1.7", default-features = false, features = "unicode-perl", ] } rustyline = { optional = true, version = "12.0" } +rayon = "1.8" ### ROBLOX diff --git a/src/cli/build.rs b/src/cli/build.rs index 398286ae..724ca5c0 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -13,25 +13,39 @@ pub async fn build_standalone + Into>( ) -> Result<()> { // First, we read the contents of the lune interpreter as our starting point let mut patched_bin = fs::read(env::current_exe()?).await?; + let base_bin_offset = u64::try_from(patched_bin.len() - 1)?; + + println!("base offset: {}", base_bin_offset); // The signature which separates indicates the presence of bytecode to execute // If a binary contains this signature, that must mean it is a standalone binar - let signature: Vec = vec![0x12, 0xed, 0x93, 0x14, 0x28]; - - // Append the signature to the base binary - for byte in signature { - patched_bin.push(byte); - } + let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; // Compile luau input into bytecode - let mut bytecode = LuaCompiler::new() + let bytecode = LuaCompiler::new() .set_optimization_level(2) .set_coverage_level(0) .set_debug_level(0) .compile(code); - // Append compiled bytecode to binary and finalize - patched_bin.append(&mut bytecode); + println!("{}", bytecode.len()); + + patched_bin.append(&mut bytecode.clone()); + + let mut meta = base_bin_offset.to_ne_bytes().to_vec(); + + // bytecode base size files signature + // meta.append(&mut [0, 0, 0, 0].to_vec()); // 4 extra padding bytes after 4 byte u64 + meta.append(&mut (bytecode.len() as u64).to_ne_bytes().to_vec()); // FIXME: len is greater than u8::max + meta.append(&mut 1_u64.to_ne_bytes().to_vec()); // Number of files, padded with null bytes + // meta.append(&mut [0, 0, 0, 0].to_vec()); // 4 extra padding bytes after 4 byte u32 + + patched_bin.append(&mut meta); + + // Append the signature to the base binary + for byte in signature { + patched_bin.push(byte); + } // Write the compiled binary to file fs::write(output_path, patched_bin).await?; diff --git a/src/cli/mod.rs b/src/cli/mod.rs index e2c0345e..a0946d60 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -151,27 +151,66 @@ impl Cli { } // Signature which is only present in standalone lune binaries - let signature: Vec = vec![0x12, 0xed, 0x93, 0x14, 0x28]; + let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; // Read the current lune binary to memory let bin = read_to_vec(env::current_exe()?).await?; + // let mut bin = vec![0; tmp_bin.len() % signature.len()]; + + // println!("padding: {:?}", bin); + + // bin.append(&mut tmp_bin); + + let mut bytecode_offset = 0; + let mut bytecode_size = 0; + + // println!("{}",); + + for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { + if idx == 0 && chunk != signature { + // We don't have a standalone binary + break; + } + + if idx == 3 { + bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); + } + + if idx == 2 { + bytecode_size = u64::from_ne_bytes(chunk.try_into()?); + } + } + + if bytecode_offset != 0 && bytecode_size != 0 { + Lune::new() + .with_args(self.script_args.clone()) + .run( + "STANDALONE", + &bin[usize::try_from(bytecode_offset).unwrap() + ..usize::try_from(bytecode_offset + bytecode_size).unwrap()], + ) + .await?; + } + + return repl::show_interface().await; // Check to see if the lune executable includes the signature - return match bin - .windows(signature.len()) - .position(|block| block == signature) - { - // If we find the signature, all bytes after the 5 signature bytes must be bytecode - Some(offset) => Ok(Lune::new() - .with_args(self.script_args) - .run("STANDALONE", &bin[offset + signature.len()..bin.len()]) - .await?), - - // If we did not generate any typedefs, know we're not a precompiled bin and - // we know that the user did not provide any other options, and in that - // case we should enter the REPL - None => repl::show_interface().await, - }; + // return match bin + // .clone() + // .par_windows(signature.len()) + // // .rev() + // .position_any(|block| block == signature) + // { + // // If we find the signature, all bytes after the 5 signature bytes must be bytecode + // Some(offset) => { + // // let offset = bin.len() - 1 - back_offset; + // } + + // // If we did not generate any typedefs, know we're not a precompiled bin and + // // we know that the user did not provide any other options, and in that + // // case we should enter the REPL + // None => repl::show_interface().await, + // }; } // Figure out if we should read from stdin or from a file, // reading from stdin is marked by passing a single "-" From 441a1eacfe065d19670bcc9d216d24bb0f9ad3db Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Wed, 22 Nov 2023 15:41:19 +0530 Subject: [PATCH 03/22] fix: finalize updated standalone runtime system --- src/cli/build.rs | 4 +--- src/cli/mod.rs | 30 ++++++++++-------------------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 724ca5c0..c25f8d6e 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -13,9 +13,7 @@ pub async fn build_standalone + Into>( ) -> Result<()> { // First, we read the contents of the lune interpreter as our starting point let mut patched_bin = fs::read(env::current_exe()?).await?; - let base_bin_offset = u64::try_from(patched_bin.len() - 1)?; - - println!("base offset: {}", base_bin_offset); + let base_bin_offset = u64::try_from(patched_bin.len())?; // The signature which separates indicates the presence of bytecode to execute // If a binary contains this signature, that must mean it is a standalone binar diff --git a/src/cli/mod.rs b/src/cli/mod.rs index a0946d60..caee24ca 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -182,35 +182,25 @@ impl Cli { } if bytecode_offset != 0 && bytecode_size != 0 { - Lune::new() + let result = Lune::new() .with_args(self.script_args.clone()) .run( "STANDALONE", &bin[usize::try_from(bytecode_offset).unwrap() ..usize::try_from(bytecode_offset + bytecode_size).unwrap()], ) - .await?; + .await; + + return Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }); } return repl::show_interface().await; - - // Check to see if the lune executable includes the signature - // return match bin - // .clone() - // .par_windows(signature.len()) - // // .rev() - // .position_any(|block| block == signature) - // { - // // If we find the signature, all bytes after the 5 signature bytes must be bytecode - // Some(offset) => { - // // let offset = bin.len() - 1 - back_offset; - // } - - // // If we did not generate any typedefs, know we're not a precompiled bin and - // // we know that the user did not provide any other options, and in that - // // case we should enter the REPL - // None => repl::show_interface().await, - // }; } // Figure out if we should read from stdin or from a file, // reading from stdin is marked by passing a single "-" From 2bf68c1e2a9e5b906a7983b9d0441476c781d347 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Wed, 22 Nov 2023 17:49:30 +0530 Subject: [PATCH 04/22] refactor: polish a few things and clean up code --- src/cli/build.rs | 35 ++++++++++++++++++++--------------- src/cli/mod.rs | 33 +++++++++++++++++++++++++-------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index c25f8d6e..6bf98716 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,8 +1,12 @@ use std::{ env, path::{Path, PathBuf}, + process::ExitCode, +}; +use tokio::{ + fs::{self, OpenOptions}, + io::AsyncWriteExt, }; -use tokio::fs; use anyhow::Result; use mlua::Compiler as LuaCompiler; @@ -10,13 +14,13 @@ use mlua::Compiler as LuaCompiler; pub async fn build_standalone + Into>( output_path: T, code: impl AsRef<[u8]>, -) -> Result<()> { +) -> Result { // First, we read the contents of the lune interpreter as our starting point let mut patched_bin = fs::read(env::current_exe()?).await?; let base_bin_offset = u64::try_from(patched_bin.len())?; // The signature which separates indicates the presence of bytecode to execute - // If a binary contains this signature, that must mean it is a standalone binar + // If a binary contains this signature, that must mean it is a standalone binary let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; // Compile luau input into bytecode @@ -26,27 +30,28 @@ pub async fn build_standalone + Into>( .set_debug_level(0) .compile(code); - println!("{}", bytecode.len()); - patched_bin.append(&mut bytecode.clone()); let mut meta = base_bin_offset.to_ne_bytes().to_vec(); - // bytecode base size files signature - // meta.append(&mut [0, 0, 0, 0].to_vec()); // 4 extra padding bytes after 4 byte u64 - meta.append(&mut (bytecode.len() as u64).to_ne_bytes().to_vec()); // FIXME: len is greater than u8::max + // Include metadata in the META chunk, each field is 8 bytes + meta.append(&mut (bytecode.len() as u64).to_ne_bytes().to_vec()); // Size of bytecode, used to calculate end offset at runtime meta.append(&mut 1_u64.to_ne_bytes().to_vec()); // Number of files, padded with null bytes - // meta.append(&mut [0, 0, 0, 0].to_vec()); // 4 extra padding bytes after 4 byte u32 patched_bin.append(&mut meta); // Append the signature to the base binary - for byte in signature { - patched_bin.push(byte); - } + patched_bin.append(&mut signature.clone()); // Write the compiled binary to file - fs::write(output_path, patched_bin).await?; - - Ok(()) + OpenOptions::new() + .write(true) + .create(true) + .mode(0o770) + .open(output_path) + .await? + .write_all(&patched_bin) + .await?; + + Ok(ExitCode::SUCCESS) } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index caee24ca..71cde7ec 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -155,17 +155,26 @@ impl Cli { // Read the current lune binary to memory let bin = read_to_vec(env::current_exe()?).await?; - // let mut bin = vec![0; tmp_bin.len() % signature.len()]; - - // println!("padding: {:?}", bin); - - // bin.append(&mut tmp_bin); let mut bytecode_offset = 0; let mut bytecode_size = 0; - // println!("{}",); - + // standalone binary structure (reversed, 8 bytes per field) + // [0] => signature + // ---------------- + // -- META Chunk -- + // [1] => file count + // [2] => bytecode size + // [3] => bytecode offset + // ---------------- + // -- MISC Chunk -- + // [4..n] => bytecode (variable size) + // ---------------- + // NOTE: All integers are 8 byte unsigned 64 bit (u64's). + + // The rchunks will have unequally sized sections in the beginning + // but that doesn't matter to us because we don't need anything past the + // middle chunks where the bytecode is stored for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { if idx == 0 && chunk != signature { // We don't have a standalone binary @@ -181,7 +190,13 @@ impl Cli { } } + // If we were able to retrieve the required metadata, we load + // and execute the bytecode if bytecode_offset != 0 && bytecode_size != 0 { + // FIXME: Passing arguments does not work like it should, because the first + // argument provided is treated as the script path. We should probably also not + // allow any runner functionality within standalone binaries + let result = Lune::new() .with_args(self.script_args.clone()) .run( @@ -200,6 +215,8 @@ impl Cli { }); } + // If not in a standalone context and we don't have any arguments + // display the interactive REPL interface return repl::show_interface().await; } // Figure out if we should read from stdin or from a file, @@ -232,7 +249,7 @@ impl Cli { return Ok( match build_standalone(output_path, strip_shebang(script_contents.clone())).await { - Ok(()) => ExitCode::SUCCESS, + Ok(exitcode) => exitcode, Err(err) => { eprintln!("{err}"); ExitCode::FAILURE From 4bb0eba589e63b30c212324b4ce8c4d62795ae1f Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Wed, 22 Nov 2023 19:50:26 +0530 Subject: [PATCH 05/22] feat: disable unneeded CLI args for standalone --- src/cli/mod.rs | 228 ++++++++++++++++++++++++++----------------------- 1 file changed, 119 insertions(+), 109 deletions(-) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 71cde7ec..0d2a2dc3 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -85,9 +85,16 @@ impl Cli { #[allow(clippy::too_many_lines)] pub async fn run(self) -> Result { + // Signature which is only present in standalone lune binaries + let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; + // Read the current lune binary to memory + let bin = read_to_vec(env::current_exe()?).await?; + + let is_standalone = bin[bin.len() - signature.len()..bin.len()] == signature; + // List files in `lune` and `.lune` directories, if wanted // This will also exit early and not run anything else - if self.list { + if self.list && !is_standalone { let sorted_relative = find_lune_scripts(false).await.map(sort_lune_scripts); let sorted_home_dir = find_lune_scripts(true).await.map(sort_lune_scripts); @@ -150,125 +157,128 @@ impl Cli { return Ok(ExitCode::SUCCESS); } - // Signature which is only present in standalone lune binaries - let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; - - // Read the current lune binary to memory - let bin = read_to_vec(env::current_exe()?).await?; - - let mut bytecode_offset = 0; - let mut bytecode_size = 0; - - // standalone binary structure (reversed, 8 bytes per field) - // [0] => signature - // ---------------- - // -- META Chunk -- - // [1] => file count - // [2] => bytecode size - // [3] => bytecode offset - // ---------------- - // -- MISC Chunk -- - // [4..n] => bytecode (variable size) - // ---------------- - // NOTE: All integers are 8 byte unsigned 64 bit (u64's). - - // The rchunks will have unequally sized sections in the beginning - // but that doesn't matter to us because we don't need anything past the - // middle chunks where the bytecode is stored - for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { - if idx == 0 && chunk != signature { - // We don't have a standalone binary - break; - } + if is_standalone { + let mut bytecode_offset = 0; + let mut bytecode_size = 0; + + // standalone binary structure (reversed, 8 bytes per field) + // [0] => signature + // ---------------- + // -- META Chunk -- + // [1] => file count + // [2] => bytecode size + // [3] => bytecode offset + // ---------------- + // -- MISC Chunk -- + // [4..n] => bytecode (variable size) + // ---------------- + // NOTE: All integers are 8 byte unsigned 64 bit (u64's). + + // The rchunks will have unequally sized sections in the beginning + // but that doesn't matter to us because we don't need anything past the + // middle chunks where the bytecode is stored + for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { + if idx == 0 && chunk != signature { + // We don't have a standalone binary + break; + } - if idx == 3 { - bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); - } + if idx == 3 { + bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); + } - if idx == 2 { - bytecode_size = u64::from_ne_bytes(chunk.try_into()?); + if idx == 2 { + bytecode_size = u64::from_ne_bytes(chunk.try_into()?); + } } - } - // If we were able to retrieve the required metadata, we load - // and execute the bytecode - if bytecode_offset != 0 && bytecode_size != 0 { - // FIXME: Passing arguments does not work like it should, because the first - // argument provided is treated as the script path. We should probably also not - // allow any runner functionality within standalone binaries - - let result = Lune::new() - .with_args(self.script_args.clone()) - .run( - "STANDALONE", - &bin[usize::try_from(bytecode_offset).unwrap() - ..usize::try_from(bytecode_offset + bytecode_size).unwrap()], - ) - .await; - - return Ok(match result { - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - Ok(code) => code, - }); + // If we were able to retrieve the required metadata, we load + // and execute the bytecode + if bytecode_offset != 0 && bytecode_size != 0 { + // FIXME: Passing arguments does not work like it should, because the first + // argument provided is treated as the script path. We should probably also not + // allow any runner functionality within standalone binaries + + let result = Lune::new() + .with_args(self.script_args.clone()) // TODO: args should also include lune reserved ones + .run( + "STANDALONE", + &bin[usize::try_from(bytecode_offset).unwrap() + ..usize::try_from(bytecode_offset + bytecode_size).unwrap()], + ) + .await; + + return Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }); + } } // If not in a standalone context and we don't have any arguments // display the interactive REPL interface return repl::show_interface().await; } - // Figure out if we should read from stdin or from a file, - // reading from stdin is marked by passing a single "-" - // (dash) as the script name to run to the cli - let script_path = self.script_path.unwrap(); - - let (script_display_name, script_contents) = if script_path == "-" { - let mut stdin_contents = Vec::new(); - stdin() - .read_to_end(&mut stdin_contents) - .await - .context("Failed to read script contents from stdin")?; - ("stdin".to_string(), stdin_contents) - } else { - let file_path = discover_script_path_including_lune_dirs(&script_path)?; - let file_contents = read_to_vec(&file_path).await?; - // NOTE: We skip the extension here to remove it from stack traces - let file_display_name = file_path.with_extension("").display().to_string(); - (file_display_name, file_contents) - }; - - if self.build { - let output_path = - PathBuf::from(script_path.clone()).with_extension(env::consts::EXE_EXTENSION); - println!( - "Building {script_path} to {}", - output_path.to_string_lossy() - ); - - return Ok( - match build_standalone(output_path, strip_shebang(script_contents.clone())).await { - Ok(exitcode) => exitcode, - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - }, - ); - } - // Create a new lune object with all globals & run the script - let result = Lune::new() - .with_args(self.script_args) - .run(&script_display_name, strip_shebang(script_contents)) - .await; - Ok(match result { - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE + if !is_standalone { + // Figure out if we should read from stdin or from a file, + // reading from stdin is marked by passing a single "-" + // (dash) as the script name to run to the cli + let script_path = self.script_path.unwrap(); + + let (script_display_name, script_contents) = if script_path == "-" { + let mut stdin_contents = Vec::new(); + stdin() + .read_to_end(&mut stdin_contents) + .await + .context("Failed to read script contents from stdin")?; + ("stdin".to_string(), stdin_contents) + } else { + let file_path = discover_script_path_including_lune_dirs(&script_path)?; + let file_contents = read_to_vec(&file_path).await?; + // NOTE: We skip the extension here to remove it from stack traces + let file_display_name = file_path.with_extension("").display().to_string(); + (file_display_name, file_contents) + }; + + if self.build { + let output_path = + PathBuf::from(script_path.clone()).with_extension(env::consts::EXE_EXTENSION); + println!( + "Building {script_path} to {}", + output_path.to_string_lossy() + ); + + return Ok( + match build_standalone(output_path, strip_shebang(script_contents.clone())) + .await + { + Ok(exitcode) => exitcode, + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + }, + ); } - Ok(code) => code, - }) + + // Create a new lune object with all globals & run the script + let result = Lune::new() + .with_args(self.script_args) + .run(&script_display_name, strip_shebang(script_contents)) + .await; + return Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }); + } + + Ok(ExitCode::SUCCESS) } } From 60870694936e06b1718b1af1660c2af592d9c49d Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Wed, 22 Nov 2023 22:34:10 +0530 Subject: [PATCH 06/22] fix(windows): write file differently for windows --- src/cli/build.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 6bf98716..b158c692 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -44,14 +44,18 @@ pub async fn build_standalone + Into>( patched_bin.append(&mut signature.clone()); // Write the compiled binary to file - OpenOptions::new() - .write(true) - .create(true) - .mode(0o770) - .open(output_path) - .await? - .write_all(&patched_bin) - .await?; + if cfg!(unix) { + OpenOptions::new() + .write(true) + .create(true) + .mode(0o770) + .open(output_path) + .await? + .write_all(&patched_bin) + .await?; + } else if cfg!(windows) { + fs::write(output_path, &patched_bin).await?; + } Ok(ExitCode::SUCCESS) } From cf2f93d480559a4d1c570438878848f567906143 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 23 Nov 2023 11:17:10 +0530 Subject: [PATCH 07/22] fix: conditionally compile fs writing system for windows --- src/cli/build.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index b158c692..ed5a0505 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -44,18 +44,18 @@ pub async fn build_standalone + Into>( patched_bin.append(&mut signature.clone()); // Write the compiled binary to file - if cfg!(unix) { - OpenOptions::new() - .write(true) - .create(true) - .mode(0o770) - .open(output_path) - .await? - .write_all(&patched_bin) - .await?; - } else if cfg!(windows) { - fs::write(output_path, &patched_bin).await?; - } + #[cfg(not(target_os = "windows"))] + OpenOptions::new() + .write(true) + .create(true) + .mode(0o770) + .open(&output_path) + .await? + .write_all(&patched_bin) + .await?; + + #[cfg(target_os = "windows")] + fs::write(&output_path, &patched_bin).await?; Ok(ExitCode::SUCCESS) } From 2af8ed3b9fb0377b355589b371b70b0d70661d79 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 23 Nov 2023 12:25:33 +0530 Subject: [PATCH 08/22] feat: proper args support for standalone binaries --- .lune/csv_printer.luau | 2 +- src/cli/build.rs | 6 +- src/cli/mod.rs | 154 +++++++++++++++++++++++++---------------- 3 files changed, 97 insertions(+), 65 deletions(-) diff --git a/.lune/csv_printer.luau b/.lune/csv_printer.luau index f077d52f..2949598b 100644 --- a/.lune/csv_printer.luau +++ b/.lune/csv_printer.luau @@ -60,4 +60,4 @@ for rowIndex, row in csvTable do print(string.format("┣%s┫", thiccLine)) end end -print(string.format("┗%s┛", thiccLine)) \ No newline at end of file +print(string.format("┗%s┛", thiccLine)) diff --git a/src/cli/build.rs b/src/cli/build.rs index ed5a0505..3bae49f2 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -44,17 +44,17 @@ pub async fn build_standalone + Into>( patched_bin.append(&mut signature.clone()); // Write the compiled binary to file - #[cfg(not(target_os = "windows"))] + #[cfg(target_family = "unix")] OpenOptions::new() .write(true) .create(true) - .mode(0o770) + .mode(0o770) // read, write and execute permissions for user and group .open(&output_path) .await? .write_all(&patched_bin) .await?; - #[cfg(target_os = "windows")] + #[cfg(target_family = "windows")] fs::write(&output_path, &patched_bin).await?; Ok(ExitCode::SUCCESS) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 0d2a2dc3..065c3267 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -92,6 +92,99 @@ impl Cli { let is_standalone = bin[bin.len() - signature.len()..bin.len()] == signature; + if is_standalone { + let mut bytecode_offset = 0; + let mut bytecode_size = 0; + + // standalone binary structure (reversed, 8 bytes per field) + // [0] => signature + // ---------------- + // -- META Chunk -- + // [1] => file count + // [2] => bytecode size + // [3] => bytecode offset + // ---------------- + // -- MISC Chunk -- + // [4..n] => bytecode (variable size) + // ---------------- + // NOTE: All integers are 8 byte unsigned 64 bit (u64's). + + // The rchunks will have unequally sized sections in the beginning + // but that doesn't matter to us because we don't need anything past the + // middle chunks where the bytecode is stored + for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { + if idx == 0 && chunk != signature { + // Binary is guaranteed to be standalone, we've confirmed this before + unreachable!() + } + + if idx == 3 { + bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); + } + + if idx == 2 { + bytecode_size = u64::from_ne_bytes(chunk.try_into()?); + } + } + + // If we were able to retrieve the required metadata, we load + // and execute the bytecode + if bytecode_offset != 0 && bytecode_size != 0 { + // FIXME: Passing arguments does not work like it should, because the first + // argument provided is treated as the script path. We should probably also not + // allow any runner functionality within standalone binaries + + let mut reserved_args = Vec::new(); + + macro_rules! include_reserved_args { + ($($arg_bool:expr=> $mapping:literal),*) => { + $( + if $arg_bool { + reserved_args.push($mapping.to_string()) + } + )* + }; + } + + let mut real_args = Vec::new(); + + if let Some(first_arg) = self.script_path { + println!("{first_arg}"); + + real_args.push(first_arg); + } + + include_reserved_args! { + self.setup => "--setup", + self.generate_docs_file => "--generate-docs-file", + self.generate_selene_types => "--generate-selene-types", + self.generate_luau_types => "--generate-luau-types", + self.list => "--list", + self.build => "--build" + } + + real_args.append(&mut reserved_args); + real_args.append(&mut self.script_args.clone()); + + let result = Lune::new() + .with_args(real_args) // TODO: args should also include lune reserved ones + .run( + "STANDALONE", + &bin[usize::try_from(bytecode_offset)? + ..usize::try_from(bytecode_offset + bytecode_size)?], + ) + .await; + + return Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }); + } + } + // List files in `lune` and `.lune` directories, if wanted // This will also exit early and not run anything else if self.list && !is_standalone { @@ -157,67 +250,6 @@ impl Cli { return Ok(ExitCode::SUCCESS); } - if is_standalone { - let mut bytecode_offset = 0; - let mut bytecode_size = 0; - - // standalone binary structure (reversed, 8 bytes per field) - // [0] => signature - // ---------------- - // -- META Chunk -- - // [1] => file count - // [2] => bytecode size - // [3] => bytecode offset - // ---------------- - // -- MISC Chunk -- - // [4..n] => bytecode (variable size) - // ---------------- - // NOTE: All integers are 8 byte unsigned 64 bit (u64's). - - // The rchunks will have unequally sized sections in the beginning - // but that doesn't matter to us because we don't need anything past the - // middle chunks where the bytecode is stored - for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { - if idx == 0 && chunk != signature { - // We don't have a standalone binary - break; - } - - if idx == 3 { - bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); - } - - if idx == 2 { - bytecode_size = u64::from_ne_bytes(chunk.try_into()?); - } - } - - // If we were able to retrieve the required metadata, we load - // and execute the bytecode - if bytecode_offset != 0 && bytecode_size != 0 { - // FIXME: Passing arguments does not work like it should, because the first - // argument provided is treated as the script path. We should probably also not - // allow any runner functionality within standalone binaries - - let result = Lune::new() - .with_args(self.script_args.clone()) // TODO: args should also include lune reserved ones - .run( - "STANDALONE", - &bin[usize::try_from(bytecode_offset).unwrap() - ..usize::try_from(bytecode_offset + bytecode_size).unwrap()], - ) - .await; - - return Ok(match result { - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - Ok(code) => code, - }); - } - } - // If not in a standalone context and we don't have any arguments // display the interactive REPL interface return repl::show_interface().await; From 1e43f70c92255a573f1c0ae51590bb653b139951 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 23 Nov 2023 19:46:29 +0530 Subject: [PATCH 09/22] feat: SUPER fast standalone binaries using jemalloc & rayon --- Cargo.lock | 21 ++++++++ Cargo.toml | 2 + src/cli/mod.rs | 140 +++++++++++++++++++++++++++---------------------- src/main.rs | 6 +++ 4 files changed, 105 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90a3106f..7bd61af6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1165,6 +1165,7 @@ dependencies = [ "serde_json", "serde_yaml", "thiserror", + "tikv-jemallocator", "tokio", "tokio-tungstenite", "toml", @@ -2349,6 +2350,26 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tikv-jemalloc-sys" +version = "0.5.4+5.3.0-patched" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "tikv-jemallocator" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca" +dependencies = [ + "libc", + "tikv-jemalloc-sys", +] + [[package]] name = "time" version = "0.2.27" diff --git a/Cargo.toml b/Cargo.toml index 8338685e..b0bb3bde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,6 +83,8 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } mlua = { version = "0.9.1", features = ["luau", "luau-jit", "serialize"] } tokio = { version = "1.24", features = ["full", "tracing"] } os_str_bytes = { version = "6.4", features = ["conversions"] } +[target.'cfg(not(target_env = "msvc"))'.dependencies] +tikv-jemallocator = "0.5" ### SERDE diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 065c3267..0edb7e6a 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,9 +1,13 @@ -use std::{env, fmt::Write as _, path::PathBuf, process::ExitCode}; +use std::{env, fmt::Write as _, ops::ControlFlow, path::PathBuf, process::ExitCode, sync::Mutex}; use anyhow::{Context, Result}; use clap::Parser; use lune::Lune; +use rayon::{ + iter::{IndexedParallelIterator, ParallelIterator}, + slice::ParallelSlice, +}; use tokio::{ fs::read as read_to_vec, io::{stdin, AsyncReadExt}, @@ -93,8 +97,8 @@ impl Cli { let is_standalone = bin[bin.len() - signature.len()..bin.len()] == signature; if is_standalone { - let mut bytecode_offset = 0; - let mut bytecode_size = 0; + let bytecode_offset = Mutex::new(0); + let bytecode_size = Mutex::new(0); // standalone binary structure (reversed, 8 bytes per field) // [0] => signature @@ -112,31 +116,45 @@ impl Cli { // The rchunks will have unequally sized sections in the beginning // but that doesn't matter to us because we don't need anything past the // middle chunks where the bytecode is stored - for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { - if idx == 0 && chunk != signature { - // Binary is guaranteed to be standalone, we've confirmed this before - unreachable!() - } + bin.par_rchunks(signature.len()) + .enumerate() + .try_for_each(|(idx, chunk)| { + let mut bytecode_offset = bytecode_offset.lock().unwrap(); + let mut bytecode_size = bytecode_size.lock().unwrap(); + + if *bytecode_offset != 0 && *bytecode_size != 0 { + return ControlFlow::Break(()); + } - if idx == 3 { - bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); - } + if idx == 0 && chunk != signature { + // Binary is guaranteed to be standalone, we've confirmed this before + unreachable!("expected proper signature for standalone binary") + } - if idx == 2 { - bytecode_size = u64::from_ne_bytes(chunk.try_into()?); - } - } + if idx == 3 { + *bytecode_offset = u64::from_ne_bytes(chunk.try_into().unwrap()); + } + + if idx == 2 { + *bytecode_size = u64::from_ne_bytes(chunk.try_into().unwrap()); + } + + ControlFlow::Continue(()) + }); + + let bytecode_offset_inner = bytecode_offset.into_inner().unwrap(); + let bytecode_size_inner = bytecode_size.into_inner().unwrap(); // If we were able to retrieve the required metadata, we load // and execute the bytecode - if bytecode_offset != 0 && bytecode_size != 0 { - // FIXME: Passing arguments does not work like it should, because the first - // argument provided is treated as the script path. We should probably also not - // allow any runner functionality within standalone binaries + // if bytecode_offset_inner != 0 && bytecode_size_inner != 0 { + // FIXME: Passing arguments does not work like it should, because the first + // argument provided is treated as the script path. We should probably also not + // allow any runner functionality within standalone binaries - let mut reserved_args = Vec::new(); + let mut reserved_args = Vec::new(); - macro_rules! include_reserved_args { + macro_rules! include_reserved_args { ($($arg_bool:expr=> $mapping:literal),*) => { $( if $arg_bool { @@ -146,43 +164,41 @@ impl Cli { }; } - let mut real_args = Vec::new(); + let mut real_args = Vec::new(); - if let Some(first_arg) = self.script_path { - println!("{first_arg}"); - - real_args.push(first_arg); - } + if let Some(first_arg) = self.script_path { + real_args.push(first_arg); + } - include_reserved_args! { - self.setup => "--setup", - self.generate_docs_file => "--generate-docs-file", - self.generate_selene_types => "--generate-selene-types", - self.generate_luau_types => "--generate-luau-types", - self.list => "--list", - self.build => "--build" - } + include_reserved_args! { + self.setup => "--setup", + self.generate_docs_file => "--generate-docs-file", + self.generate_selene_types => "--generate-selene-types", + self.generate_luau_types => "--generate-luau-types", + self.list => "--list", + self.build => "--build" + } - real_args.append(&mut reserved_args); - real_args.append(&mut self.script_args.clone()); + real_args.append(&mut reserved_args); + real_args.append(&mut self.script_args.clone()); - let result = Lune::new() - .with_args(real_args) // TODO: args should also include lune reserved ones - .run( - "STANDALONE", - &bin[usize::try_from(bytecode_offset)? - ..usize::try_from(bytecode_offset + bytecode_size)?], - ) - .await; + let result = Lune::new() + .with_args(real_args) // TODO: args should also include lune reserved ones + .run( + "STANDALONE", + &bin[usize::try_from(bytecode_offset_inner)? + ..usize::try_from(bytecode_offset_inner + bytecode_size_inner)?], + ) + .await; - return Ok(match result { - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - Ok(code) => code, - }); - } + return Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }); + // } } // List files in `lune` and `.lune` directories, if wanted @@ -284,17 +300,13 @@ impl Cli { output_path.to_string_lossy() ); - return Ok( - match build_standalone(output_path, strip_shebang(script_contents.clone())) - .await - { - Ok(exitcode) => exitcode, - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - }, - ); + return Ok(match build_standalone(output_path, script_contents).await { + Ok(exitcode) => exitcode, + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + }); } // Create a new lune object with all globals & run the script diff --git a/src/main.rs b/src/main.rs index cd133005..c2564b6a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,12 @@ pub(crate) mod cli; use cli::Cli; use console::style; +#[cfg(not(target_env = "msvc"))] +use tikv_jemallocator::Jemalloc; + +#[cfg(not(target_env = "msvc"))] +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; #[tokio::main(flavor = "multi_thread")] async fn main() -> ExitCode { From 9c615ad1035d062d179d242a6d3875cb97ed62d5 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Tue, 2 Jan 2024 21:46:20 +0530 Subject: [PATCH 10/22] refactor: cleanup code & include logging --- src/cli/build.rs | 25 +++++- src/cli/mod.rs | 208 ++++++++++------------------------------------- src/executor.rs | 123 ++++++++++++++++++++++++++++ src/main.rs | 11 +++ 4 files changed, 202 insertions(+), 165 deletions(-) create mode 100644 src/executor.rs diff --git a/src/cli/build.rs b/src/cli/build.rs index 3bae49f2..5b621ed4 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,3 +1,4 @@ +use console::Style; use std::{ env, path::{Path, PathBuf}, @@ -11,10 +12,26 @@ use tokio::{ use anyhow::Result; use mlua::Compiler as LuaCompiler; +/** + Compiles and embeds the bytecode of a requested lua file to form a standalone binary, + then writes it to an output file, with the required permissions. +*/ +#[allow(clippy::similar_names)] pub async fn build_standalone + Into>( + script_path: String, output_path: T, code: impl AsRef<[u8]>, ) -> Result { + let log_output_path = output_path.as_ref().display(); + + let prefix_style = Style::new().green().bold(); + let compile_prefix = prefix_style.apply_to("Compile"); + let bytecode_prefix = prefix_style.apply_to("Bytecode"); + let write_prefix = prefix_style.apply_to("Write"); + let compiled_prefix = prefix_style.apply_to("Compiled"); + + println!("{compile_prefix} {script_path}"); + // First, we read the contents of the lune interpreter as our starting point let mut patched_bin = fs::read(env::current_exe()?).await?; let base_bin_offset = u64::try_from(patched_bin.len())?; @@ -30,13 +47,15 @@ pub async fn build_standalone + Into>( .set_debug_level(0) .compile(code); + println!(" {bytecode_prefix} {script_path}"); + patched_bin.append(&mut bytecode.clone()); let mut meta = base_bin_offset.to_ne_bytes().to_vec(); // Include metadata in the META chunk, each field is 8 bytes meta.append(&mut (bytecode.len() as u64).to_ne_bytes().to_vec()); // Size of bytecode, used to calculate end offset at runtime - meta.append(&mut 1_u64.to_ne_bytes().to_vec()); // Number of files, padded with null bytes + meta.append(&mut 1_u64.to_ne_bytes().to_vec()); // Number of files, padded with null bytes - for future use patched_bin.append(&mut meta); @@ -57,5 +76,9 @@ pub async fn build_standalone + Into>( #[cfg(target_family = "windows")] fs::write(&output_path, &patched_bin).await?; + println!(" {write_prefix} {log_output_path}"); + + println!("{compiled_prefix} {log_output_path}"); + Ok(ExitCode::SUCCESS) } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 0edb7e6a..f85ac586 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,13 +1,9 @@ -use std::{env, fmt::Write as _, ops::ControlFlow, path::PathBuf, process::ExitCode, sync::Mutex}; +use std::{env, fmt::Write as _, path::PathBuf, process::ExitCode}; use anyhow::{Context, Result}; use clap::Parser; use lune::Lune; -use rayon::{ - iter::{IndexedParallelIterator, ParallelIterator}, - slice::ParallelSlice, -}; use tokio::{ fs::read as read_to_vec, io::{stdin, AsyncReadExt}, @@ -89,121 +85,9 @@ impl Cli { #[allow(clippy::too_many_lines)] pub async fn run(self) -> Result { - // Signature which is only present in standalone lune binaries - let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; - // Read the current lune binary to memory - let bin = read_to_vec(env::current_exe()?).await?; - - let is_standalone = bin[bin.len() - signature.len()..bin.len()] == signature; - - if is_standalone { - let bytecode_offset = Mutex::new(0); - let bytecode_size = Mutex::new(0); - - // standalone binary structure (reversed, 8 bytes per field) - // [0] => signature - // ---------------- - // -- META Chunk -- - // [1] => file count - // [2] => bytecode size - // [3] => bytecode offset - // ---------------- - // -- MISC Chunk -- - // [4..n] => bytecode (variable size) - // ---------------- - // NOTE: All integers are 8 byte unsigned 64 bit (u64's). - - // The rchunks will have unequally sized sections in the beginning - // but that doesn't matter to us because we don't need anything past the - // middle chunks where the bytecode is stored - bin.par_rchunks(signature.len()) - .enumerate() - .try_for_each(|(idx, chunk)| { - let mut bytecode_offset = bytecode_offset.lock().unwrap(); - let mut bytecode_size = bytecode_size.lock().unwrap(); - - if *bytecode_offset != 0 && *bytecode_size != 0 { - return ControlFlow::Break(()); - } - - if idx == 0 && chunk != signature { - // Binary is guaranteed to be standalone, we've confirmed this before - unreachable!("expected proper signature for standalone binary") - } - - if idx == 3 { - *bytecode_offset = u64::from_ne_bytes(chunk.try_into().unwrap()); - } - - if idx == 2 { - *bytecode_size = u64::from_ne_bytes(chunk.try_into().unwrap()); - } - - ControlFlow::Continue(()) - }); - - let bytecode_offset_inner = bytecode_offset.into_inner().unwrap(); - let bytecode_size_inner = bytecode_size.into_inner().unwrap(); - - // If we were able to retrieve the required metadata, we load - // and execute the bytecode - // if bytecode_offset_inner != 0 && bytecode_size_inner != 0 { - // FIXME: Passing arguments does not work like it should, because the first - // argument provided is treated as the script path. We should probably also not - // allow any runner functionality within standalone binaries - - let mut reserved_args = Vec::new(); - - macro_rules! include_reserved_args { - ($($arg_bool:expr=> $mapping:literal),*) => { - $( - if $arg_bool { - reserved_args.push($mapping.to_string()) - } - )* - }; - } - - let mut real_args = Vec::new(); - - if let Some(first_arg) = self.script_path { - real_args.push(first_arg); - } - - include_reserved_args! { - self.setup => "--setup", - self.generate_docs_file => "--generate-docs-file", - self.generate_selene_types => "--generate-selene-types", - self.generate_luau_types => "--generate-luau-types", - self.list => "--list", - self.build => "--build" - } - - real_args.append(&mut reserved_args); - real_args.append(&mut self.script_args.clone()); - - let result = Lune::new() - .with_args(real_args) // TODO: args should also include lune reserved ones - .run( - "STANDALONE", - &bin[usize::try_from(bytecode_offset_inner)? - ..usize::try_from(bytecode_offset_inner + bytecode_size_inner)?], - ) - .await; - - return Ok(match result { - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - Ok(code) => code, - }); - // } - } - // List files in `lune` and `.lune` directories, if wanted // This will also exit early and not run anything else - if self.list && !is_standalone { + if self.list { let sorted_relative = find_lune_scripts(false).await.map(sort_lune_scripts); let sorted_home_dir = find_lune_scripts(true).await.map(sort_lune_scripts); @@ -271,58 +155,54 @@ impl Cli { return repl::show_interface().await; } - if !is_standalone { - // Figure out if we should read from stdin or from a file, - // reading from stdin is marked by passing a single "-" - // (dash) as the script name to run to the cli - let script_path = self.script_path.unwrap(); - - let (script_display_name, script_contents) = if script_path == "-" { - let mut stdin_contents = Vec::new(); - stdin() - .read_to_end(&mut stdin_contents) - .await - .context("Failed to read script contents from stdin")?; - ("stdin".to_string(), stdin_contents) - } else { - let file_path = discover_script_path_including_lune_dirs(&script_path)?; - let file_contents = read_to_vec(&file_path).await?; - // NOTE: We skip the extension here to remove it from stack traces - let file_display_name = file_path.with_extension("").display().to_string(); - (file_display_name, file_contents) - }; - - if self.build { - let output_path = - PathBuf::from(script_path.clone()).with_extension(env::consts::EXE_EXTENSION); - println!( - "Building {script_path} to {}", - output_path.to_string_lossy() - ); - - return Ok(match build_standalone(output_path, script_contents).await { + // Figure out if we should read from stdin or from a file, + // reading from stdin is marked by passing a single "-" + // (dash) as the script name to run to the cli + let script_path = self.script_path.unwrap(); + + let (script_display_name, script_contents) = if script_path == "-" { + let mut stdin_contents = Vec::new(); + stdin() + .read_to_end(&mut stdin_contents) + .await + .context("Failed to read script contents from stdin")?; + ("stdin".to_string(), stdin_contents) + } else { + let file_path = discover_script_path_including_lune_dirs(&script_path)?; + let file_contents = read_to_vec(&file_path).await?; + // NOTE: We skip the extension here to remove it from stack traces + let file_display_name = file_path.with_extension("").display().to_string(); + (file_display_name, file_contents) + }; + + if self.build { + let output_path = + PathBuf::from(script_path.clone()).with_extension(env::consts::EXE_EXTENSION); + + println!("Building {script_path} to {}...\n", output_path.display()); + + return Ok( + match build_standalone(script_path, output_path, script_contents).await { Ok(exitcode) => exitcode, Err(err) => { eprintln!("{err}"); ExitCode::FAILURE } - }); - } - - // Create a new lune object with all globals & run the script - let result = Lune::new() - .with_args(self.script_args) - .run(&script_display_name, strip_shebang(script_contents)) - .await; - return Ok(match result { - Err(err) => { - eprintln!("{err}"); - ExitCode::FAILURE - } - Ok(code) => code, - }); + }, + ); } - Ok(ExitCode::SUCCESS) + // Create a new lune object with all globals & run the script + let result = Lune::new() + .with_args(self.script_args) + .run(&script_display_name, strip_shebang(script_contents)) + .await; + Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }) } } diff --git a/src/executor.rs b/src/executor.rs new file mode 100644 index 00000000..fc75026d --- /dev/null +++ b/src/executor.rs @@ -0,0 +1,123 @@ +use std::{ + collections::VecDeque, + env, + ops::ControlFlow, + process::{self, ExitCode}, + sync::Mutex, +}; + +use lune::Lune; + +use anyhow::Result; +use rayon::{ + iter::{IndexedParallelIterator, ParallelIterator}, + slice::ParallelSlice, +}; +use tokio::fs::read as read_to_vec; + +/** + Returns information about whether the execution environment is standalone + or not, the standalone binary signature, and the contents of the binary. +*/ +pub async fn check_env() -> (bool, Vec, Vec) { + // Signature which is only present in standalone lune binaries + let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; + + // Read the current lune binary to memory + let bin = if let Ok(contents) = read_to_vec(match env::current_exe() { + Ok(path) => path, + Err(err) => { + eprintln!( + "WARN: Couldn't get path to currently running lune executable; err: {}", + err.kind() + ); + process::exit(1); + } + }) + .await + { + contents + } else { + Vec::new() + }; + + let is_standalone = !bin.is_empty() && bin[bin.len() - signature.len()..bin.len()] == signature; + + (is_standalone, signature, bin) +} + +/** + Discovers, loads and executes the bytecode contained in a standalone binary. +*/ +pub async fn run_standalone(signature: Vec, bin: Vec) -> Result { + let bytecode_offset = Mutex::new(0); + let bytecode_size = Mutex::new(0); + + // standalone binary structure (reversed, 8 bytes per field) + // [0] => signature + // ---------------- + // -- META Chunk -- + // [1] => file count + // [2] => bytecode size + // [3] => bytecode offset + // ---------------- + // -- MISC Chunk -- + // [4..n] => bytecode (variable size) + // ---------------- + // NOTE: All integers are 8 byte unsigned 64 bit (u64's). + + // The rchunks will have unequally sized sections in the beginning + // but that doesn't matter to us because we don't need anything past the + // middle chunks where the bytecode is stored + bin.par_rchunks(signature.len()) + .enumerate() + .try_for_each(|(idx, chunk)| { + let mut bytecode_offset = bytecode_offset.lock().unwrap(); + let mut bytecode_size = bytecode_size.lock().unwrap(); + + if *bytecode_offset != 0 && *bytecode_size != 0 { + return ControlFlow::Break(()); + } + + if idx == 0 && chunk != signature { + // Binary is guaranteed to be standalone, we've confirmed this before + unreachable!("expected proper signature for standalone binary") + } + + if idx == 3 { + *bytecode_offset = u64::from_ne_bytes(chunk.try_into().unwrap()); + } + + if idx == 2 { + *bytecode_size = u64::from_ne_bytes(chunk.try_into().unwrap()); + } + + ControlFlow::Continue(()) + }); + + let bytecode_offset_inner = bytecode_offset.into_inner().unwrap(); + let bytecode_size_inner = bytecode_size.into_inner().unwrap(); + + // If we were able to retrieve the required metadata, we load + // and execute the bytecode + + let mut args = env::args().collect::>(); + args.pop_front(); + + let result = Lune::new() + .with_args(args) + .run( + "STANDALONE", + &bin[usize::try_from(bytecode_offset_inner)? + ..usize::try_from(bytecode_offset_inner + bytecode_size_inner)?], + ) + .await; + + Ok(match result { + Err(err) => { + eprintln!("{err}"); + ExitCode::FAILURE + } + Ok(code) => code, + }) +} diff --git a/src/main.rs b/src/main.rs index c2564b6a..3c3afabd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,9 +13,11 @@ use std::process::ExitCode; use clap::Parser; pub(crate) mod cli; +pub(crate) mod executor; use cli::Cli; use console::style; + #[cfg(not(target_env = "msvc"))] use tikv_jemallocator::Jemalloc; @@ -32,6 +34,15 @@ async fn main() -> ExitCode { .with_timer(tracing_subscriber::fmt::time::uptime()) .with_level(true) .init(); + + let (is_standalone, signature, bin) = executor::check_env().await; + + if is_standalone { + // It's fine to unwrap here since we don't want to continue + // if something fails + return executor::run_standalone(signature, bin).await.unwrap(); + } + match Cli::parse().run().await { Ok(code) => code, Err(err) => { From a5d118db4b141e9d2dccd2b8c5dea0de69b46dbe Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 4 Jan 2024 18:28:04 +0530 Subject: [PATCH 11/22] fix: improper trait bounds --- src/cli/build.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 5b621ed4..99c64e7b 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,9 +1,5 @@ use console::Style; -use std::{ - env, - path::{Path, PathBuf}, - process::ExitCode, -}; +use std::{env, path::Path, process::ExitCode}; use tokio::{ fs::{self, OpenOptions}, io::AsyncWriteExt, @@ -17,7 +13,7 @@ use mlua::Compiler as LuaCompiler; then writes it to an output file, with the required permissions. */ #[allow(clippy::similar_names)] -pub async fn build_standalone + Into>( +pub async fn build_standalone>( script_path: String, output_path: T, code: impl AsRef<[u8]>, From 5f68fee1a9f33a5f69017a674286dac0271471af Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 4 Jan 2024 18:29:57 +0530 Subject: [PATCH 12/22] fix: add rustdoc comments for build arg --- src/cli/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index f85ac586..2ba63a53 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -47,6 +47,7 @@ pub struct Cli { /// Generate a Lune documentation file for Luau LSP #[clap(long, hide = true)] generate_docs_file: bool, + /// Build a Luau file to an OS-Native standalone executable #[clap(long, hide = true)] build: bool, } From 75152bd384ab3b6bea9b42f4b9ef1bc23f763a86 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 4 Jan 2024 18:31:31 +0530 Subject: [PATCH 13/22] fix: panic on failure to get current exe --- src/executor.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/executor.rs b/src/executor.rs index fc75026d..51d7787d 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -24,16 +24,9 @@ pub async fn check_env() -> (bool, Vec, Vec) { let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; // Read the current lune binary to memory - let bin = if let Ok(contents) = read_to_vec(match env::current_exe() { - Ok(path) => path, - Err(err) => { - eprintln!( - "WARN: Couldn't get path to currently running lune executable; err: {}", - err.kind() - ); - process::exit(1); - } - }) + let bin = if let Ok(contents) = read_to_vec( + env::current_exe().expect("failed to get path to current running lune executable"), + ) .await { contents From 53b53a27fd2c472893f974ace7c236cce53709e7 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 4 Jan 2024 18:33:51 +0530 Subject: [PATCH 14/22] fix: avoid collecting to unneeded VecDequeue --- src/executor.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/executor.rs b/src/executor.rs index 51d7787d..183bc31a 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,10 +1,4 @@ -use std::{ - collections::VecDeque, - env, - ops::ControlFlow, - process::{self, ExitCode}, - sync::Mutex, -}; +use std::{env, ops::ControlFlow, process::ExitCode, sync::Mutex}; use lune::Lune; @@ -94,8 +88,8 @@ pub async fn run_standalone(signature: Vec, bin: Vec) -> Result>(); - args.pop_front(); + // Skip the first argument which is the path to current executable + let args = env::args().skip(1).collect::>(); let result = Lune::new() .with_args(args) From 6f4b2f4c313c74e9da6cef614a61de5a3cb1c5a8 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 4 Jan 2024 19:02:00 +0530 Subject: [PATCH 15/22] fix: remove redundant multi-threading code --- Cargo.lock | 75 ------------------------------------------------- Cargo.toml | 3 -- src/executor.rs | 28 ++++++------------ src/main.rs | 7 ----- 4 files changed, 9 insertions(+), 104 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f242aa59..f5e29e62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,30 +468,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" -dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "memoffset", - "scopeguard", -] - [[package]] name = "crossbeam-utils" version = "0.8.18" @@ -1156,7 +1132,6 @@ dependencies = [ "path-clean", "pin-project", "rand", - "rayon", "rbx_binary", "rbx_cookie", "rbx_dom_weak", @@ -1170,7 +1145,6 @@ dependencies = [ "serde_json", "serde_yaml", "thiserror", - "tikv-jemallocator", "tokio", "tokio-tungstenite", "toml", @@ -1223,15 +1197,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -1588,26 +1553,6 @@ dependencies = [ "getrandom 0.2.11", ] -[[package]] -name = "rayon" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "rbx_binary" version = "0.7.3" @@ -2337,26 +2282,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "tikv-jemalloc-sys" -version = "0.5.4+5.3.0-patched" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "tikv-jemallocator" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca" -dependencies = [ - "libc", - "tikv-jemalloc-sys", -] - [[package]] name = "time" version = "0.2.27" diff --git a/Cargo.toml b/Cargo.toml index b0bb3bde..6474e9d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,8 +83,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } mlua = { version = "0.9.1", features = ["luau", "luau-jit", "serialize"] } tokio = { version = "1.24", features = ["full", "tracing"] } os_str_bytes = { version = "6.4", features = ["conversions"] } -[target.'cfg(not(target_env = "msvc"))'.dependencies] -tikv-jemallocator = "0.5" ### SERDE @@ -126,7 +124,6 @@ regex = { optional = true, version = "1.7", default-features = false, features = "unicode-perl", ] } rustyline = { optional = true, version = "12.0" } -rayon = "1.8" ### ROBLOX diff --git a/src/executor.rs b/src/executor.rs index 183bc31a..18e025e2 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,12 +1,8 @@ -use std::{env, ops::ControlFlow, process::ExitCode, sync::Mutex}; +use std::{env, ops::ControlFlow, process::ExitCode}; use lune::Lune; use anyhow::Result; -use rayon::{ - iter::{IndexedParallelIterator, ParallelIterator}, - slice::ParallelSlice, -}; use tokio::fs::read as read_to_vec; /** @@ -37,8 +33,8 @@ pub async fn check_env() -> (bool, Vec, Vec) { Discovers, loads and executes the bytecode contained in a standalone binary. */ pub async fn run_standalone(signature: Vec, bin: Vec) -> Result { - let bytecode_offset = Mutex::new(0); - let bytecode_size = Mutex::new(0); + let mut bytecode_offset = 0; + let mut bytecode_size = 0; // standalone binary structure (reversed, 8 bytes per field) // [0] => signature @@ -56,13 +52,10 @@ pub async fn run_standalone(signature: Vec, bin: Vec) -> Result, bin: Vec) -> Result, bin: Vec) -> Result ExitCode { tracing_subscriber::fmt() From 94b27d81d139d9e7c98d26ce011d91533e50a33e Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Thu, 4 Jan 2024 21:22:24 +0530 Subject: [PATCH 16/22] fix: make build option visible to user --- src/cli/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 2ba63a53..cdbd8316 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -48,7 +48,7 @@ pub struct Cli { #[clap(long, hide = true)] generate_docs_file: bool, /// Build a Luau file to an OS-Native standalone executable - #[clap(long, hide = true)] + #[clap(long)] build: bool, } From 3c2464d3ce86ab31d58db996855be35165af304c Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Fri, 5 Jan 2024 17:08:43 +0530 Subject: [PATCH 17/22] feat: store magic signature as a meaningful constant --- src/cli/build.rs | 23 ++++++++++++----------- src/executor.rs | 36 +++++++++++++++++++----------------- src/main.rs | 4 ++-- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 99c64e7b..156191ac 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -8,6 +8,11 @@ use tokio::{ use anyhow::Result; use mlua::Compiler as LuaCompiler; +// The signature which separates indicates the presence of bytecode to execute +// If a binary contains this magic signature as the last 8 bytes, that must mean +// it is a standalone binary +pub const MAGIC: &[u8; 8] = b"cr3sc3nt"; + /** Compiles and embeds the bytecode of a requested lua file to form a standalone binary, then writes it to an output file, with the required permissions. @@ -32,10 +37,6 @@ pub async fn build_standalone>( let mut patched_bin = fs::read(env::current_exe()?).await?; let base_bin_offset = u64::try_from(patched_bin.len())?; - // The signature which separates indicates the presence of bytecode to execute - // If a binary contains this signature, that must mean it is a standalone binary - let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; - // Compile luau input into bytecode let bytecode = LuaCompiler::new() .set_optimization_level(2) @@ -45,18 +46,18 @@ pub async fn build_standalone>( println!(" {bytecode_prefix} {script_path}"); - patched_bin.append(&mut bytecode.clone()); + patched_bin.extend(&bytecode); - let mut meta = base_bin_offset.to_ne_bytes().to_vec(); + let mut meta = base_bin_offset.to_ne_bytes().to_vec(); // Start with the base bytecode offset // Include metadata in the META chunk, each field is 8 bytes - meta.append(&mut (bytecode.len() as u64).to_ne_bytes().to_vec()); // Size of bytecode, used to calculate end offset at runtime - meta.append(&mut 1_u64.to_ne_bytes().to_vec()); // Number of files, padded with null bytes - for future use + meta.extend((bytecode.len() as u64).to_ne_bytes()); // Size of bytecode, used to calculate end offset at runtime + meta.extend(1_u64.to_ne_bytes()); // Number of files, padded with null bytes - for future use - patched_bin.append(&mut meta); + patched_bin.extend(meta); - // Append the signature to the base binary - patched_bin.append(&mut signature.clone()); + // Append the magic signature to the base binary + patched_bin.extend(MAGIC); // Write the compiled binary to file #[cfg(target_family = "unix")] diff --git a/src/executor.rs b/src/executor.rs index 18e025e2..dac4e0c8 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,5 +1,6 @@ use std::{env, ops::ControlFlow, process::ExitCode}; +use crate::cli::build::MAGIC; use lune::Lune; use anyhow::Result; @@ -9,10 +10,7 @@ use tokio::fs::read as read_to_vec; Returns information about whether the execution environment is standalone or not, the standalone binary signature, and the contents of the binary. */ -pub async fn check_env() -> (bool, Vec, Vec) { - // Signature which is only present in standalone lune binaries - let signature: Vec = vec![0x4f, 0x3e, 0xf8, 0x41, 0xc3, 0x3a, 0x52, 0x16]; - +pub async fn check_env() -> (bool, Vec) { // Read the current lune binary to memory let bin = if let Ok(contents) = read_to_vec( env::current_exe().expect("failed to get path to current running lune executable"), @@ -24,20 +22,21 @@ pub async fn check_env() -> (bool, Vec, Vec) { Vec::new() }; - let is_standalone = !bin.is_empty() && bin[bin.len() - signature.len()..bin.len()] == signature; + let is_standalone = + !bin.is_empty() && bin[bin.len() - MAGIC.len()..bin.len()] == MAGIC.to_vec(); - (is_standalone, signature, bin) + (is_standalone, bin) } /** Discovers, loads and executes the bytecode contained in a standalone binary. */ -pub async fn run_standalone(signature: Vec, bin: Vec) -> Result { +pub async fn run_standalone(bin: Vec) -> Result { let mut bytecode_offset = 0; let mut bytecode_size = 0; // standalone binary structure (reversed, 8 bytes per field) - // [0] => signature + // [0] => magic signature // ---------------- // -- META Chunk -- // [1] => file count @@ -47,21 +46,21 @@ pub async fn run_standalone(signature: Vec, bin: Vec) -> Result bytecode (variable size) // ---------------- - // NOTE: All integers are 8 byte unsigned 64 bit (u64's). + // NOTE: All integers are 8 byte, padded, unsigned & 64 bit (u64's). // The rchunks will have unequally sized sections in the beginning // but that doesn't matter to us because we don't need anything past the // middle chunks where the bytecode is stored - bin.rchunks(signature.len()) + bin.rchunks(MAGIC.len()) .enumerate() .try_for_each(|(idx, chunk)| { if bytecode_offset != 0 && bytecode_size != 0 { return ControlFlow::Break(()); } - if idx == 0 && chunk != signature { + if idx == 0 && chunk != MAGIC { // Binary is guaranteed to be standalone, we've confirmed this before - unreachable!("expected proper signature for standalone binary") + unreachable!("expected proper magic signature for standalone binary") } if idx == 3 { @@ -78,16 +77,19 @@ pub async fn run_standalone(signature: Vec, bin: Vec) -> Result>(); + let bytecode = + &bin[usize::try_from(bytecode_offset)?..usize::try_from(bytecode_offset + bytecode_size)?]; + + // println!("bytecode: {:?}", bytecode); let result = Lune::new() .with_args(args) - .run( - "STANDALONE", - &bin[usize::try_from(bytecode_offset)? - ..usize::try_from(bytecode_offset + bytecode_size)?], - ) + .run("STANDALONE", bytecode) .await; Ok(match result { diff --git a/src/main.rs b/src/main.rs index 1febe406..3cdb8211 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,12 +28,12 @@ async fn main() -> ExitCode { .with_level(true) .init(); - let (is_standalone, signature, bin) = executor::check_env().await; + let (is_standalone, bin) = executor::check_env().await; if is_standalone { // It's fine to unwrap here since we don't want to continue // if something fails - return executor::run_standalone(signature, bin).await.unwrap(); + return executor::run_standalone(bin).await.unwrap(); } match Cli::parse().run().await { From 35c5a3ca61dc2655833afafb36cbd31f85202939 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Fri, 5 Jan 2024 17:27:50 +0530 Subject: [PATCH 18/22] fix: use fixed (little) endianness --- src/cli/build.rs | 6 +++--- src/executor.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 156191ac..7bbbfc3b 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -48,11 +48,11 @@ pub async fn build_standalone>( patched_bin.extend(&bytecode); - let mut meta = base_bin_offset.to_ne_bytes().to_vec(); // Start with the base bytecode offset + let mut meta = base_bin_offset.to_le_bytes().to_vec(); // Start with the base bytecode offset // Include metadata in the META chunk, each field is 8 bytes - meta.extend((bytecode.len() as u64).to_ne_bytes()); // Size of bytecode, used to calculate end offset at runtime - meta.extend(1_u64.to_ne_bytes()); // Number of files, padded with null bytes - for future use + meta.extend((bytecode.len() as u64).to_le_bytes()); // Size of bytecode, used to calculate end offset at runtime + meta.extend(1_u64.to_le_bytes()); // Number of files, padded with null bytes - for future use patched_bin.extend(meta); diff --git a/src/executor.rs b/src/executor.rs index dac4e0c8..59f151fa 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -64,11 +64,11 @@ pub async fn run_standalone(bin: Vec) -> Result { } if idx == 3 { - bytecode_offset = u64::from_ne_bytes(chunk.try_into().unwrap()); + bytecode_offset = u64::from_le_bytes(chunk.try_into().unwrap()); } if idx == 2 { - bytecode_size = u64::from_ne_bytes(chunk.try_into().unwrap()); + bytecode_size = u64::from_le_bytes(chunk.try_into().unwrap()); } ControlFlow::Continue(()) From b071db3f12b7e2998cad29e810f2bde6e8943d90 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Fri, 5 Jan 2024 18:20:41 +0530 Subject: [PATCH 19/22] feat: initial META chunk (de)serialization impl --- src/cli/build.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 5 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 7bbbfc3b..2e3a7670 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,4 +1,6 @@ use console::Style; +use itertools::Itertools; +use num_traits::{FromBytes, ToBytes}; use std::{env, path::Path, process::ExitCode}; use tokio::{ fs::{self, OpenOptions}, @@ -13,6 +15,101 @@ use mlua::Compiler as LuaCompiler; // it is a standalone binary pub const MAGIC: &[u8; 8] = b"cr3sc3nt"; +/// Utility struct to parse and generate bytes to the META chunk of standalone binaries. +#[derive(Debug, Clone)] +pub struct MetaChunk { + /// Compiled lua bytecode of the entrypoint script. + bytecode: Vec, + /// Offset to the the beginning of the bytecode from the start of the lune binary. + bytecode_offset: Option, + /// Number of files present, currently unused. **For future use**. + file_count: Option, +} + +impl MetaChunk { + pub fn new() -> Self { + Self { + bytecode: Vec::new(), + bytecode_offset: None, + file_count: None, + } + } + + pub fn with_bytecode(&mut self, bytecode: Vec) -> Self { + self.bytecode = bytecode; + + self.clone() + } + + pub fn with_bytecode_offset(&mut self, offset: u64) -> Self { + self.bytecode_offset = Some(offset); + + self.clone() + } + + pub fn with_file_count(&mut self, count: u64) -> Self { + self.file_count = Some(count); + + self.clone() + } + + pub fn build(self, endianness: &str) -> Vec { + match endianness { + "big" => self.to_be_bytes(), + "little" => self.to_le_bytes(), + &_ => panic!("unexpected endianness"), + } + } +} + +impl Default for MetaChunk { + fn default() -> Self { + Self { + bytecode: Vec::new(), + bytecode_offset: Some(0), + file_count: Some(1), + } + } +} + +impl ToBytes for MetaChunk { + type Bytes = Vec; + + fn to_be_bytes(&self) -> Self::Bytes { + // We start with the bytecode offset as the first field already filled in + let mut tmp = self.bytecode_offset.unwrap().to_be_bytes().to_vec(); + + // NOTE: The order of the fields here are reversed, which is on purpose + tmp.extend(self.bytecode.len().to_be_bytes()); + tmp.extend(self.file_count.unwrap().to_be_bytes()); + + tmp + } + + fn to_le_bytes(&self) -> Self::Bytes { + // We start with the bytecode offset as the first field already filled in + let mut tmp = self.bytecode_offset.unwrap().to_le_bytes().to_vec(); + + // NOTE: The order of the fields here are reversed, which is on purpose + tmp.extend(self.bytecode.len().to_le_bytes()); + tmp.extend(self.file_count.unwrap().to_le_bytes()); + + tmp + } +} + +impl FromBytes for MetaChunk { + type Bytes = Vec; + + fn from_be_bytes(bytes: &Self::Bytes) -> Self { + todo!() + } + + fn from_le_bytes(bytes: &Self::Bytes) -> Self { + todo!() + } +} + /** Compiles and embeds the bytecode of a requested lua file to form a standalone binary, then writes it to an output file, with the required permissions. @@ -48,13 +145,13 @@ pub async fn build_standalone>( patched_bin.extend(&bytecode); - let mut meta = base_bin_offset.to_le_bytes().to_vec(); // Start with the base bytecode offset + let meta = MetaChunk::new() + .with_bytecode(bytecode) + .with_bytecode_offset(base_bin_offset) + .with_file_count(1_u64); // Start with the base bytecode offset // Include metadata in the META chunk, each field is 8 bytes - meta.extend((bytecode.len() as u64).to_le_bytes()); // Size of bytecode, used to calculate end offset at runtime - meta.extend(1_u64.to_le_bytes()); // Number of files, padded with null bytes - for future use - - patched_bin.extend(meta); + patched_bin.extend(meta.build("little")); // Append the magic signature to the base binary patched_bin.extend(MAGIC); From 94fd549a65fdd7cb75a7663e710fc52ed21601f4 Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Sat, 13 Jan 2024 19:32:08 +0530 Subject: [PATCH 20/22] refactor: impl discovery logic as trait --- src/cli/build.rs | 72 +++++++++++++++++++++++++++++++++++++++++++----- src/executor.rs | 55 +++--------------------------------- 2 files changed, 69 insertions(+), 58 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 2e3a7670..6eef84b8 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,7 +1,6 @@ use console::Style; -use itertools::Itertools; use num_traits::{FromBytes, ToBytes}; -use std::{env, path::Path, process::ExitCode}; +use std::{env, ops::ControlFlow, path::Path, process::ExitCode}; use tokio::{ fs::{self, OpenOptions}, io::AsyncWriteExt, @@ -19,11 +18,11 @@ pub const MAGIC: &[u8; 8] = b"cr3sc3nt"; #[derive(Debug, Clone)] pub struct MetaChunk { /// Compiled lua bytecode of the entrypoint script. - bytecode: Vec, + pub bytecode: Vec, /// Offset to the the beginning of the bytecode from the start of the lune binary. - bytecode_offset: Option, + pub bytecode_offset: Option, /// Number of files present, currently unused. **For future use**. - file_count: Option, + pub file_count: Option, } impl MetaChunk { @@ -60,6 +59,62 @@ impl MetaChunk { &_ => panic!("unexpected endianness"), } } + + fn from_bytes(bytes: &[u8], int_handler: fn([u8; 8]) -> u64) -> Result { + let mut bytecode_offset = 0; + let mut bytecode_size = 0; + + // standalone binary structure (reversed, 8 bytes per field) + // [0] => magic signature + // ---------------- + // -- META Chunk -- + // [1] => file count + // [2] => bytecode size + // [3] => bytecode offset + // ---------------- + // -- MISC Chunk -- + // [4..n] => bytecode (variable size) + // ---------------- + // NOTE: All integers are 8 byte, padded, unsigned & 64 bit (u64's). + + // The rchunks will have unequally sized sections in the beginning + // but that doesn't matter to us because we don't need anything past the + // middle chunks where the bytecode is stored + bytes + .rchunks(MAGIC.len()) + .enumerate() + .try_for_each(|(idx, chunk)| { + if bytecode_offset != 0 && bytecode_size != 0 { + return ControlFlow::Break(()); + } + + if idx == 0 && chunk != MAGIC { + // Binary is guaranteed to be standalone, we've confirmed this before + unreachable!("expected proper magic signature for standalone binary") + } + + if idx == 3 { + bytecode_offset = int_handler(chunk.try_into().unwrap()); + } + + if idx == 2 { + bytecode_size = int_handler(chunk.try_into().unwrap()); + } + + ControlFlow::Continue(()) + }); + + println!("size: {}", bytecode_size); + println!("offset: {}", bytecode_offset); + + Ok(Self { + bytecode: bytes[usize::try_from(bytecode_offset)? + ..usize::try_from(bytecode_offset + bytecode_size)?] + .to_vec(), + bytecode_offset: Some(bytecode_offset), + file_count: Some(1), + }) + } } impl Default for MetaChunk { @@ -94,6 +149,9 @@ impl ToBytes for MetaChunk { tmp.extend(self.bytecode.len().to_le_bytes()); tmp.extend(self.file_count.unwrap().to_le_bytes()); + println!("size: {}", self.bytecode.len()); + println!("offset: {:?}", self.bytecode_offset); + tmp } } @@ -102,11 +160,11 @@ impl FromBytes for MetaChunk { type Bytes = Vec; fn from_be_bytes(bytes: &Self::Bytes) -> Self { - todo!() + Self::from_bytes(bytes, u64::from_be_bytes).unwrap() } fn from_le_bytes(bytes: &Self::Bytes) -> Self { - todo!() + Self::from_bytes(bytes, u64::from_le_bytes).unwrap() } } diff --git a/src/executor.rs b/src/executor.rs index 59f151fa..6fecf510 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,9 +1,10 @@ -use std::{env, ops::ControlFlow, process::ExitCode}; +use std::{env, process::ExitCode}; -use crate::cli::build::MAGIC; +use crate::cli::build::{MetaChunk, MAGIC}; use lune::Lune; use anyhow::Result; +use num_traits::FromBytes; use tokio::fs::read as read_to_vec; /** @@ -32,60 +33,12 @@ pub async fn check_env() -> (bool, Vec) { Discovers, loads and executes the bytecode contained in a standalone binary. */ pub async fn run_standalone(bin: Vec) -> Result { - let mut bytecode_offset = 0; - let mut bytecode_size = 0; - - // standalone binary structure (reversed, 8 bytes per field) - // [0] => magic signature - // ---------------- - // -- META Chunk -- - // [1] => file count - // [2] => bytecode size - // [3] => bytecode offset - // ---------------- - // -- MISC Chunk -- - // [4..n] => bytecode (variable size) - // ---------------- - // NOTE: All integers are 8 byte, padded, unsigned & 64 bit (u64's). - - // The rchunks will have unequally sized sections in the beginning - // but that doesn't matter to us because we don't need anything past the - // middle chunks where the bytecode is stored - bin.rchunks(MAGIC.len()) - .enumerate() - .try_for_each(|(idx, chunk)| { - if bytecode_offset != 0 && bytecode_size != 0 { - return ControlFlow::Break(()); - } - - if idx == 0 && chunk != MAGIC { - // Binary is guaranteed to be standalone, we've confirmed this before - unreachable!("expected proper magic signature for standalone binary") - } - - if idx == 3 { - bytecode_offset = u64::from_le_bytes(chunk.try_into().unwrap()); - } - - if idx == 2 { - bytecode_size = u64::from_le_bytes(chunk.try_into().unwrap()); - } - - ControlFlow::Continue(()) - }); - // If we were able to retrieve the required metadata, we load // and execute the bytecode - - // println!("offset: {}", bytecode_offset); - // println!("size: {}", bytecode_size); + let MetaChunk { bytecode, .. } = MetaChunk::from_le_bytes(&bin); // Skip the first argument which is the path to current executable let args = env::args().skip(1).collect::>(); - let bytecode = - &bin[usize::try_from(bytecode_offset)?..usize::try_from(bytecode_offset + bytecode_size)?]; - - // println!("bytecode: {:?}", bytecode); let result = Lune::new() .with_args(args) From 55fe033f21b4e609d89720914d0e2613d1b3d78d Mon Sep 17 00:00:00 2001 From: Erica Marigold Date: Sat, 13 Jan 2024 19:37:51 +0530 Subject: [PATCH 21/22] refactor: move most shared logic to executor.rs --- src/cli/build.rs | 162 +--------------------------------------------- src/executor.rs | 165 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 164 insertions(+), 163 deletions(-) diff --git a/src/cli/build.rs b/src/cli/build.rs index 6eef84b8..b3977857 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,6 +1,5 @@ use console::Style; -use num_traits::{FromBytes, ToBytes}; -use std::{env, ops::ControlFlow, path::Path, process::ExitCode}; +use std::{env, path::Path, process::ExitCode}; use tokio::{ fs::{self, OpenOptions}, io::AsyncWriteExt, @@ -9,164 +8,7 @@ use tokio::{ use anyhow::Result; use mlua::Compiler as LuaCompiler; -// The signature which separates indicates the presence of bytecode to execute -// If a binary contains this magic signature as the last 8 bytes, that must mean -// it is a standalone binary -pub const MAGIC: &[u8; 8] = b"cr3sc3nt"; - -/// Utility struct to parse and generate bytes to the META chunk of standalone binaries. -#[derive(Debug, Clone)] -pub struct MetaChunk { - /// Compiled lua bytecode of the entrypoint script. - pub bytecode: Vec, - /// Offset to the the beginning of the bytecode from the start of the lune binary. - pub bytecode_offset: Option, - /// Number of files present, currently unused. **For future use**. - pub file_count: Option, -} - -impl MetaChunk { - pub fn new() -> Self { - Self { - bytecode: Vec::new(), - bytecode_offset: None, - file_count: None, - } - } - - pub fn with_bytecode(&mut self, bytecode: Vec) -> Self { - self.bytecode = bytecode; - - self.clone() - } - - pub fn with_bytecode_offset(&mut self, offset: u64) -> Self { - self.bytecode_offset = Some(offset); - - self.clone() - } - - pub fn with_file_count(&mut self, count: u64) -> Self { - self.file_count = Some(count); - - self.clone() - } - - pub fn build(self, endianness: &str) -> Vec { - match endianness { - "big" => self.to_be_bytes(), - "little" => self.to_le_bytes(), - &_ => panic!("unexpected endianness"), - } - } - - fn from_bytes(bytes: &[u8], int_handler: fn([u8; 8]) -> u64) -> Result { - let mut bytecode_offset = 0; - let mut bytecode_size = 0; - - // standalone binary structure (reversed, 8 bytes per field) - // [0] => magic signature - // ---------------- - // -- META Chunk -- - // [1] => file count - // [2] => bytecode size - // [3] => bytecode offset - // ---------------- - // -- MISC Chunk -- - // [4..n] => bytecode (variable size) - // ---------------- - // NOTE: All integers are 8 byte, padded, unsigned & 64 bit (u64's). - - // The rchunks will have unequally sized sections in the beginning - // but that doesn't matter to us because we don't need anything past the - // middle chunks where the bytecode is stored - bytes - .rchunks(MAGIC.len()) - .enumerate() - .try_for_each(|(idx, chunk)| { - if bytecode_offset != 0 && bytecode_size != 0 { - return ControlFlow::Break(()); - } - - if idx == 0 && chunk != MAGIC { - // Binary is guaranteed to be standalone, we've confirmed this before - unreachable!("expected proper magic signature for standalone binary") - } - - if idx == 3 { - bytecode_offset = int_handler(chunk.try_into().unwrap()); - } - - if idx == 2 { - bytecode_size = int_handler(chunk.try_into().unwrap()); - } - - ControlFlow::Continue(()) - }); - - println!("size: {}", bytecode_size); - println!("offset: {}", bytecode_offset); - - Ok(Self { - bytecode: bytes[usize::try_from(bytecode_offset)? - ..usize::try_from(bytecode_offset + bytecode_size)?] - .to_vec(), - bytecode_offset: Some(bytecode_offset), - file_count: Some(1), - }) - } -} - -impl Default for MetaChunk { - fn default() -> Self { - Self { - bytecode: Vec::new(), - bytecode_offset: Some(0), - file_count: Some(1), - } - } -} - -impl ToBytes for MetaChunk { - type Bytes = Vec; - - fn to_be_bytes(&self) -> Self::Bytes { - // We start with the bytecode offset as the first field already filled in - let mut tmp = self.bytecode_offset.unwrap().to_be_bytes().to_vec(); - - // NOTE: The order of the fields here are reversed, which is on purpose - tmp.extend(self.bytecode.len().to_be_bytes()); - tmp.extend(self.file_count.unwrap().to_be_bytes()); - - tmp - } - - fn to_le_bytes(&self) -> Self::Bytes { - // We start with the bytecode offset as the first field already filled in - let mut tmp = self.bytecode_offset.unwrap().to_le_bytes().to_vec(); - - // NOTE: The order of the fields here are reversed, which is on purpose - tmp.extend(self.bytecode.len().to_le_bytes()); - tmp.extend(self.file_count.unwrap().to_le_bytes()); - - println!("size: {}", self.bytecode.len()); - println!("offset: {:?}", self.bytecode_offset); - - tmp - } -} - -impl FromBytes for MetaChunk { - type Bytes = Vec; - - fn from_be_bytes(bytes: &Self::Bytes) -> Self { - Self::from_bytes(bytes, u64::from_be_bytes).unwrap() - } - - fn from_le_bytes(bytes: &Self::Bytes) -> Self { - Self::from_bytes(bytes, u64::from_le_bytes).unwrap() - } -} +use crate::executor::{MetaChunk, MAGIC}; /** Compiles and embeds the bytecode of a requested lua file to form a standalone binary, diff --git a/src/executor.rs b/src/executor.rs index 6fecf510..a4470705 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,12 +1,171 @@ -use std::{env, process::ExitCode}; +use std::{env, ops::ControlFlow, process::ExitCode}; -use crate::cli::build::{MetaChunk, MAGIC}; use lune::Lune; use anyhow::Result; -use num_traits::FromBytes; +use num_traits::{FromBytes, ToBytes}; use tokio::fs::read as read_to_vec; +// The signature which separates indicates the presence of bytecode to execute +// If a binary contains this magic signature as the last 8 bytes, that must mean +// it is a standalone binary +pub const MAGIC: &[u8; 8] = b"cr3sc3nt"; + +/// Utility struct to parse and generate bytes to the META chunk of standalone binaries. +#[derive(Debug, Clone)] +pub struct MetaChunk { + /// Compiled lua bytecode of the entrypoint script. + pub bytecode: Vec, + /// Offset to the the beginning of the bytecode from the start of the lune binary. + pub bytecode_offset: Option, + /// Number of files present, currently unused. **For future use**. + pub file_count: Option, +} + +impl MetaChunk { + /// Creates an emtpy `MetaChunk` instance. + pub fn new() -> Self { + Self { + bytecode: Vec::new(), + bytecode_offset: None, + file_count: None, + } + } + + /// Builder method to include the bytecode, **mandatory** before build. + pub fn with_bytecode(&mut self, bytecode: Vec) -> Self { + self.bytecode = bytecode; + + self.clone() + } + + /// Builder method to include the bytecode offset, **mandatory** before build. + pub fn with_bytecode_offset(&mut self, offset: u64) -> Self { + self.bytecode_offset = Some(offset); + + self.clone() + } + + /// Builder method to include the file count, **mandatory** before build. + + pub fn with_file_count(&mut self, count: u64) -> Self { + self.file_count = Some(count); + + self.clone() + } + + /// Builds the final `Vec` of bytes, based on the endianness specified. + pub fn build(self, endianness: &str) -> Vec { + match endianness { + "big" => self.to_be_bytes(), + "little" => self.to_le_bytes(), + &_ => panic!("unexpected endianness"), + } + } + + /// Internal method which implements endian independent bytecode discovery logic. + fn from_bytes(bytes: &[u8], int_handler: fn([u8; 8]) -> u64) -> Result { + let mut bytecode_offset = 0; + let mut bytecode_size = 0; + + // standalone binary structure (reversed, 8 bytes per field) + // [0] => magic signature + // ---------------- + // -- META Chunk -- + // [1] => file count + // [2] => bytecode size + // [3] => bytecode offset + // ---------------- + // -- MISC Chunk -- + // [4..n] => bytecode (variable size) + // ---------------- + // NOTE: All integers are 8 byte, padded, unsigned & 64 bit (u64's). + + // The rchunks will have unequally sized sections in the beginning + // but that doesn't matter to us because we don't need anything past the + // middle chunks where the bytecode is stored + bytes + .rchunks(MAGIC.len()) + .enumerate() + .try_for_each(|(idx, chunk)| { + if bytecode_offset != 0 && bytecode_size != 0 { + return ControlFlow::Break(()); + } + + if idx == 0 && chunk != MAGIC { + // Binary is guaranteed to be standalone, we've confirmed this before + unreachable!("expected proper magic signature for standalone binary") + } + + if idx == 3 { + bytecode_offset = int_handler(chunk.try_into().unwrap()); + } + + if idx == 2 { + bytecode_size = int_handler(chunk.try_into().unwrap()); + } + + ControlFlow::Continue(()) + }); + + Ok(Self { + bytecode: bytes[usize::try_from(bytecode_offset)? + ..usize::try_from(bytecode_offset + bytecode_size)?] + .to_vec(), + bytecode_offset: Some(bytecode_offset), + file_count: Some(1), + }) + } +} + +impl Default for MetaChunk { + fn default() -> Self { + Self { + bytecode: Vec::new(), + bytecode_offset: Some(0), + file_count: Some(1), + } + } +} + +impl ToBytes for MetaChunk { + type Bytes = Vec; + + fn to_be_bytes(&self) -> Self::Bytes { + // We start with the bytecode offset as the first field already filled in + let mut tmp = self.bytecode_offset.unwrap().to_be_bytes().to_vec(); + + // NOTE: The order of the fields here are reversed, which is on purpose + tmp.extend(self.bytecode.len().to_be_bytes()); + tmp.extend(self.file_count.unwrap().to_be_bytes()); + + tmp + } + + fn to_le_bytes(&self) -> Self::Bytes { + // We start with the bytecode offset as the first field already filled in + let mut tmp = self.bytecode_offset.unwrap().to_le_bytes().to_vec(); + + // NOTE: The order of the fields here are reversed, which is on purpose + tmp.extend(self.bytecode.len().to_le_bytes()); + tmp.extend(self.file_count.unwrap().to_le_bytes()); + + tmp + } +} + +impl FromBytes for MetaChunk { + type Bytes = Vec; + + fn from_be_bytes(bytes: &Self::Bytes) -> Self { + Self::from_bytes(bytes, u64::from_be_bytes).unwrap() + } + + fn from_le_bytes(bytes: &Self::Bytes) -> Self { + Self::from_bytes(bytes, u64::from_le_bytes).unwrap() + } +} + /** Returns information about whether the execution environment is standalone or not, the standalone binary signature, and the contents of the binary. From ddff5364b7c2beb29079a6d5f277e63f0952833c Mon Sep 17 00:00:00 2001 From: Filip Tibell Date: Sat, 13 Jan 2024 21:10:57 +0100 Subject: [PATCH 22/22] Make standalone compilation more minimal for initial release, minor polish & fixes --- Cargo.lock | 1 - Cargo.toml | 1 - src/cli/build.rs | 90 +++++++++----------- src/cli/mod.rs | 2 - src/executor.rs | 211 +++++++++-------------------------------------- 5 files changed, 78 insertions(+), 227 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5e29e62..1f8c6e2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1126,7 +1126,6 @@ dependencies = [ "itertools", "lz4_flex", "mlua", - "num-traits", "once_cell", "os_str_bytes", "path-clean", diff --git a/Cargo.toml b/Cargo.toml index 6474e9d9..e7531d63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,7 +110,6 @@ tokio-tungstenite = { version = "0.20", features = ["rustls-tls-webpki-roots"] } ### DATETIME chrono = "0.4" chrono_lc = "0.1" -num-traits = "0.2" ### CLI diff --git a/src/cli/build.rs b/src/cli/build.rs index b3977857..998077a2 100644 --- a/src/cli/build.rs +++ b/src/cli/build.rs @@ -1,78 +1,64 @@ -use console::Style; use std::{env, path::Path, process::ExitCode}; -use tokio::{ - fs::{self, OpenOptions}, - io::AsyncWriteExt, -}; use anyhow::Result; +use console::style; use mlua::Compiler as LuaCompiler; +use tokio::{fs, io::AsyncWriteExt as _}; -use crate::executor::{MetaChunk, MAGIC}; +use crate::executor::MetaChunk; /** - Compiles and embeds the bytecode of a requested lua file to form a standalone binary, - then writes it to an output file, with the required permissions. + Compiles and embeds the bytecode of a given lua file to form a standalone + binary, then writes it to an output file, with the required permissions. */ #[allow(clippy::similar_names)] -pub async fn build_standalone>( - script_path: String, - output_path: T, - code: impl AsRef<[u8]>, +pub async fn build_standalone( + input_path: impl AsRef, + output_path: impl AsRef, + source_code: impl AsRef<[u8]>, ) -> Result { - let log_output_path = output_path.as_ref().display(); - - let prefix_style = Style::new().green().bold(); - let compile_prefix = prefix_style.apply_to("Compile"); - let bytecode_prefix = prefix_style.apply_to("Bytecode"); - let write_prefix = prefix_style.apply_to("Write"); - let compiled_prefix = prefix_style.apply_to("Compiled"); - - println!("{compile_prefix} {script_path}"); + let input_path_displayed = input_path.as_ref().display(); + let output_path_displayed = output_path.as_ref().display(); // First, we read the contents of the lune interpreter as our starting point + println!( + "Creating standalone binary using {}", + style(input_path_displayed).green() + ); let mut patched_bin = fs::read(env::current_exe()?).await?; - let base_bin_offset = u64::try_from(patched_bin.len())?; // Compile luau input into bytecode let bytecode = LuaCompiler::new() .set_optimization_level(2) .set_coverage_level(0) - .set_debug_level(0) - .compile(code); - - println!(" {bytecode_prefix} {script_path}"); - - patched_bin.extend(&bytecode); + .set_debug_level(1) + .compile(source_code); - let meta = MetaChunk::new() - .with_bytecode(bytecode) - .with_bytecode_offset(base_bin_offset) - .with_file_count(1_u64); // Start with the base bytecode offset + // Append the bytecode / metadata to the end + let meta = MetaChunk { bytecode }; + patched_bin.extend_from_slice(&meta.to_bytes()); - // Include metadata in the META chunk, each field is 8 bytes - patched_bin.extend(meta.build("little")); + // And finally write the patched binary to the output file + println!( + "Writing standalone binary to {}", + style(output_path_displayed).blue() + ); + write_executable_file_to(output_path, patched_bin).await?; - // Append the magic signature to the base binary - patched_bin.extend(MAGIC); - - // Write the compiled binary to file - #[cfg(target_family = "unix")] - OpenOptions::new() - .write(true) - .create(true) - .mode(0o770) // read, write and execute permissions for user and group - .open(&output_path) - .await? - .write_all(&patched_bin) - .await?; + Ok(ExitCode::SUCCESS) +} - #[cfg(target_family = "windows")] - fs::write(&output_path, &patched_bin).await?; +async fn write_executable_file_to(path: impl AsRef, bytes: impl AsRef<[u8]>) -> Result<()> { + let mut options = fs::OpenOptions::new(); + options.write(true).create(true).truncate(true); - println!(" {write_prefix} {log_output_path}"); + #[cfg(unix)] + { + options.mode(0o755); // Read & execute for all, write for owner + } - println!("{compiled_prefix} {log_output_path}"); + let mut file = options.open(path).await?; + file.write_all(bytes.as_ref()).await?; - Ok(ExitCode::SUCCESS) + Ok(()) } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index cdbd8316..581d9a9d 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -180,8 +180,6 @@ impl Cli { let output_path = PathBuf::from(script_path.clone()).with_extension(env::consts::EXE_EXTENSION); - println!("Building {script_path} to {}...\n", output_path.display()); - return Ok( match build_standalone(script_path, output_path, script_contents).await { Ok(exitcode) => exitcode, diff --git a/src/executor.rs b/src/executor.rs index a4470705..0b6f3f0a 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,207 +1,76 @@ -use std::{env, ops::ControlFlow, process::ExitCode}; +use std::{env, process::ExitCode}; use lune::Lune; -use anyhow::Result; -use num_traits::{FromBytes, ToBytes}; -use tokio::fs::read as read_to_vec; +use anyhow::{bail, Result}; +use tokio::fs; -// The signature which separates indicates the presence of bytecode to execute -// If a binary contains this magic signature as the last 8 bytes, that must mean -// it is a standalone binary -pub const MAGIC: &[u8; 8] = b"cr3sc3nt"; +const MAGIC: &[u8; 8] = b"cr3sc3nt"; -/// Utility struct to parse and generate bytes to the META chunk of standalone binaries. +/** + Metadata for a standalone Lune executable. Can be used to + discover and load the bytecode contained in a standalone binary. +*/ #[derive(Debug, Clone)] pub struct MetaChunk { - /// Compiled lua bytecode of the entrypoint script. pub bytecode: Vec, - /// Offset to the the beginning of the bytecode from the start of the lune binary. - pub bytecode_offset: Option, - /// Number of files present, currently unused. **For future use**. - pub file_count: Option, } impl MetaChunk { - /// Creates an emtpy `MetaChunk` instance. - pub fn new() -> Self { - Self { - bytecode: Vec::new(), - bytecode_offset: None, - file_count: None, + /** + Tries to read a standalone binary from the given bytes. + */ + pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result { + let bytes = bytes.as_ref(); + if bytes.len() < 16 || !bytes.ends_with(MAGIC) { + bail!("not a standalone binary") } - } - /// Builder method to include the bytecode, **mandatory** before build. - pub fn with_bytecode(&mut self, bytecode: Vec) -> Self { - self.bytecode = bytecode; + // Extract bytecode size + let bytecode_size_bytes = &bytes[bytes.len() - 16..bytes.len() - 8]; + let bytecode_size = + usize::try_from(u64::from_be_bytes(bytecode_size_bytes.try_into().unwrap()))?; - self.clone() - } - - /// Builder method to include the bytecode offset, **mandatory** before build. - pub fn with_bytecode_offset(&mut self, offset: u64) -> Self { - self.bytecode_offset = Some(offset); + // Extract bytecode + let bytecode = bytes[bytes.len() - 16 - bytecode_size..].to_vec(); - self.clone() + Ok(Self { bytecode }) } - /// Builder method to include the file count, **mandatory** before build. - - pub fn with_file_count(&mut self, count: u64) -> Self { - self.file_count = Some(count); - - self.clone() - } - - /// Builds the final `Vec` of bytes, based on the endianness specified. - pub fn build(self, endianness: &str) -> Vec { - match endianness { - "big" => self.to_be_bytes(), - "little" => self.to_le_bytes(), - &_ => panic!("unexpected endianness"), - } - } - - /// Internal method which implements endian independent bytecode discovery logic. - fn from_bytes(bytes: &[u8], int_handler: fn([u8; 8]) -> u64) -> Result { - let mut bytecode_offset = 0; - let mut bytecode_size = 0; - - // standalone binary structure (reversed, 8 bytes per field) - // [0] => magic signature - // ---------------- - // -- META Chunk -- - // [1] => file count - // [2] => bytecode size - // [3] => bytecode offset - // ---------------- - // -- MISC Chunk -- - // [4..n] => bytecode (variable size) - // ---------------- - // NOTE: All integers are 8 byte, padded, unsigned & 64 bit (u64's). - - // The rchunks will have unequally sized sections in the beginning - // but that doesn't matter to us because we don't need anything past the - // middle chunks where the bytecode is stored + /** + Writes the metadata chunk to a byte vector, to later bet read using `from_bytes`. + */ + pub fn to_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.extend_from_slice(&self.bytecode); + bytes.extend_from_slice(&(self.bytecode.len() as u64).to_be_bytes()); + bytes.extend_from_slice(MAGIC); bytes - .rchunks(MAGIC.len()) - .enumerate() - .try_for_each(|(idx, chunk)| { - if bytecode_offset != 0 && bytecode_size != 0 { - return ControlFlow::Break(()); - } - - if idx == 0 && chunk != MAGIC { - // Binary is guaranteed to be standalone, we've confirmed this before - unreachable!("expected proper magic signature for standalone binary") - } - - if idx == 3 { - bytecode_offset = int_handler(chunk.try_into().unwrap()); - } - - if idx == 2 { - bytecode_size = int_handler(chunk.try_into().unwrap()); - } - - ControlFlow::Continue(()) - }); - - Ok(Self { - bytecode: bytes[usize::try_from(bytecode_offset)? - ..usize::try_from(bytecode_offset + bytecode_size)?] - .to_vec(), - bytecode_offset: Some(bytecode_offset), - file_count: Some(1), - }) - } -} - -impl Default for MetaChunk { - fn default() -> Self { - Self { - bytecode: Vec::new(), - bytecode_offset: Some(0), - file_count: Some(1), - } - } -} - -impl ToBytes for MetaChunk { - type Bytes = Vec; - - fn to_be_bytes(&self) -> Self::Bytes { - // We start with the bytecode offset as the first field already filled in - let mut tmp = self.bytecode_offset.unwrap().to_be_bytes().to_vec(); - - // NOTE: The order of the fields here are reversed, which is on purpose - tmp.extend(self.bytecode.len().to_be_bytes()); - tmp.extend(self.file_count.unwrap().to_be_bytes()); - - tmp - } - - fn to_le_bytes(&self) -> Self::Bytes { - // We start with the bytecode offset as the first field already filled in - let mut tmp = self.bytecode_offset.unwrap().to_le_bytes().to_vec(); - - // NOTE: The order of the fields here are reversed, which is on purpose - tmp.extend(self.bytecode.len().to_le_bytes()); - tmp.extend(self.file_count.unwrap().to_le_bytes()); - - tmp - } -} - -impl FromBytes for MetaChunk { - type Bytes = Vec; - - fn from_be_bytes(bytes: &Self::Bytes) -> Self { - Self::from_bytes(bytes, u64::from_be_bytes).unwrap() - } - - fn from_le_bytes(bytes: &Self::Bytes) -> Self { - Self::from_bytes(bytes, u64::from_le_bytes).unwrap() } } /** - Returns information about whether the execution environment is standalone - or not, the standalone binary signature, and the contents of the binary. + Returns whether or not the currently executing Lune binary + is a standalone binary, and if so, the bytes of the binary. */ pub async fn check_env() -> (bool, Vec) { - // Read the current lune binary to memory - let bin = if let Ok(contents) = read_to_vec( - env::current_exe().expect("failed to get path to current running lune executable"), - ) - .await - { - contents - } else { - Vec::new() - }; - - let is_standalone = - !bin.is_empty() && bin[bin.len() - MAGIC.len()..bin.len()] == MAGIC.to_vec(); - - (is_standalone, bin) + let path = env::current_exe().expect("failed to get path to current running lune executable"); + let contents = fs::read(path).await.unwrap_or_default(); + let is_standalone = contents.ends_with(MAGIC); + (is_standalone, contents) } /** Discovers, loads and executes the bytecode contained in a standalone binary. */ -pub async fn run_standalone(bin: Vec) -> Result { - // If we were able to retrieve the required metadata, we load - // and execute the bytecode - let MetaChunk { bytecode, .. } = MetaChunk::from_le_bytes(&bin); - - // Skip the first argument which is the path to current executable +pub async fn run_standalone(patched_bin: impl AsRef<[u8]>) -> Result { + // The first argument is the path to the current executable let args = env::args().skip(1).collect::>(); + let meta = MetaChunk::from_bytes(patched_bin).expect("must be a standalone binary"); let result = Lune::new() .with_args(args) - .run("STANDALONE", bytecode) + .run("STANDALONE", meta.bytecode) .await; Ok(match result {