From 7236756ff5d6838dd61ee9929c91c2bc01eebdfc Mon Sep 17 00:00:00 2001 From: David Sherret Date: Tue, 22 Oct 2024 19:19:56 -0400 Subject: [PATCH 01/20] perf(compile): use less memory --- Cargo.lock | 35 +- cli/Cargo.toml | 1 - cli/mainrt.rs | 9 +- cli/standalone/binary.rs | 769 ++++++++++++++++++++++++++++------ cli/standalone/file_system.rs | 4 +- cli/standalone/mod.rs | 210 ++++------ cli/standalone/virtual_fs.rs | 65 ++- cli/tools/compile.rs | 25 +- cli/util/text_encoding.rs | 13 + 9 files changed, 797 insertions(+), 334 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27af675174c82b..9e3f3b958f904c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1196,7 +1196,6 @@ dependencies = [ "dprint-plugin-markdown", "dprint-plugin-typescript", "env_logger", - "eszip", "fancy-regex", "faster-hex", "flate2", @@ -2891,29 +2890,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31ae425815400e5ed474178a7a22e275a9687086a12ca63ec793ff292d8fdae8" -[[package]] -name = "eszip" -version = "0.79.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb55c89bdde75a3826a79d49c9d847623ae7fbdb2695b542982982da990d33e" -dependencies = [ - "anyhow", - "async-trait", - "base64 0.21.7", - "deno_ast", - "deno_graph", - "deno_npm", - "deno_semver", - "futures", - "hashlink 0.8.4", - "indexmap", - "serde", - "serde_json", - "sha2", - "thiserror", - "url", -] - [[package]] name = "fallible-iterator" version = "0.3.0" @@ -3525,15 +3501,6 @@ dependencies = [ "allocator-api2", ] -[[package]] -name = "hashlink" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" -dependencies = [ - "hashbrown", -] - [[package]] name = "hashlink" version = "0.9.1" @@ -5811,7 +5778,7 @@ dependencies = [ "bitflags 2.6.0", "fallible-iterator", "fallible-streaming-iterator", - "hashlink 0.9.1", + "hashlink", "libsqlite3-sys", "smallvec", ] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index d6d8982b12847c..26b6ef5d58f2aa 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -84,7 +84,6 @@ deno_runtime = { workspace = true, features = ["include_js_files_for_snapshottin deno_semver.workspace = true deno_task_shell = "=0.18.1" deno_terminal.workspace = true -eszip = "=0.79.1" libsui = "0.4.0" napi_sym.workspace = true node_resolver.workspace = true diff --git a/cli/mainrt.rs b/cli/mainrt.rs index 02d58fcee1500f..f5b798f81766d6 100644 --- a/cli/mainrt.rs +++ b/cli/mainrt.rs @@ -88,11 +88,10 @@ fn main() { let standalone = standalone::extract_standalone(Cow::Owned(args)); let future = async move { match standalone { - Ok(Some(future)) => { - let (metadata, eszip) = future.await?; - util::logger::init(metadata.log_level); - load_env_vars(&metadata.env_vars_from_env_file); - let exit_code = standalone::run(eszip, metadata).await?; + Ok(Some(data)) => { + util::logger::init(data.metadata.log_level); + load_env_vars(&data.metadata.env_vars_from_env_file); + let exit_code = standalone::run(data).await?; std::process::exit(exit_code); } Ok(None) => Ok(()), diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 52ee4eeb28671d..394b1a7938609d 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -9,14 +9,18 @@ use std::ffi::OsString; use std::fs; use std::fs::File; use std::future::Future; +use std::io::ErrorKind; use std::io::Read; use std::io::Seek; use std::io::SeekFrom; use std::io::Write; +use std::ops::Range; use std::path::Path; use std::path::PathBuf; use std::process::Command; +use std::sync::Arc; +use deno_ast::MediaType; use deno_ast::ModuleSpecifier; use deno_config::workspace::PackageJsonDepResolution; use deno_config::workspace::ResolverWorkspaceJsrPackage; @@ -30,13 +34,21 @@ use deno_core::futures::AsyncReadExt; use deno_core::futures::AsyncSeekExt; use deno_core::serde_json; use deno_core::url::Url; +use deno_graph::source::RealFileSystem; +use deno_graph::ModuleGraph; +use deno_npm::resolution::SerializedNpmResolutionSnapshot; +use deno_npm::resolution::SerializedNpmResolutionSnapshotPackage; +use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; +use deno_npm::NpmPackageId; use deno_npm::NpmSystemInfo; +use deno_runtime::deno_fs; +use deno_runtime::deno_fs::FileSystem; +use deno_runtime::deno_fs::RealFs; use deno_runtime::deno_node::PackageJson; use deno_semver::npm::NpmVersionReqParseError; use deno_semver::package::PackageReq; use deno_semver::Version; use deno_semver::VersionReqSpecifierParseError; -use eszip::EszipRelativeFileBaseUrl; use indexmap::IndexMap; use log::Level; use serde::Deserialize; @@ -60,11 +72,58 @@ use crate::util::fs::canonicalize_path_maybe_not_exists; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; +use super::file_system::DenoCompileFileSystem; use super::virtual_fs::FileBackedVfs; use super::virtual_fs::VfsBuilder; use super::virtual_fs::VfsRoot; use super::virtual_fs::VirtualDirectory; +/// A URL that can be designated as the base for relative URLs. +/// +/// After creation, this URL may be used to get the key for a +/// module in the binary. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct StandaloneRelativeFileBaseUrl<'a>(&'a Url); + +impl<'a> From<&'a Url> for StandaloneRelativeFileBaseUrl<'a> { + fn from(url: &'a Url) -> Self { + Self(url) + } +} + +impl<'a> StandaloneRelativeFileBaseUrl<'a> { + pub fn new(url: &'a Url) -> Self { + debug_assert_eq!(url.scheme(), "file"); + Self(url) + } + + /// Gets the module map key of the provided specifier. + /// + /// * Descendant file specifiers will be made relative to the base. + /// * Non-descendant file specifiers will stay as-is (absolute). + /// * Non-file specifiers will stay as-is. + pub fn specifier_key<'b>(&self, target: &'b Url) -> Cow<'b, str> { + if target.scheme() != "file" { + return Cow::Borrowed(target.as_str()); + } + + match self.0.make_relative(target) { + Some(relative) => { + if relative.starts_with("../") { + Cow::Borrowed(target.as_str()) + } else { + Cow::Owned(relative) + } + } + None => Cow::Borrowed(target.as_str()), + } + } + + pub fn inner(&self) -> &Url { + self.0 + } +} + const MAGIC_TRAILER: &[u8; 8] = b"d3n0l4nd"; #[derive(Deserialize, Serialize)] @@ -120,70 +179,45 @@ pub struct Metadata { pub unstable_config: UnstableConfig, } -pub fn load_npm_vfs(root_dir_path: PathBuf) -> Result { - let data = libsui::find_section("d3n0l4nd").unwrap(); - - // We do the first part sync so it can complete quickly - let trailer: [u8; TRAILER_SIZE] = data[0..TRAILER_SIZE].try_into().unwrap(); - let trailer = match Trailer::parse(&trailer)? { - None => panic!("Could not find trailer"), - Some(trailer) => trailer, - }; - let data = &data[TRAILER_SIZE..]; - - let vfs_data = - &data[trailer.npm_vfs_pos as usize..trailer.npm_files_pos as usize]; - let mut dir: VirtualDirectory = serde_json::from_slice(vfs_data)?; - - // align the name of the directory with the root dir - dir.name = root_dir_path - .file_name() - .unwrap() - .to_string_lossy() - .to_string(); - - let fs_root = VfsRoot { - dir, - root_path: root_dir_path, - start_file_offset: trailer.npm_files_pos, - }; - Ok(FileBackedVfs::new(data.to_vec(), fs_root)) -} - fn write_binary_bytes( mut file_writer: File, original_bin: Vec, metadata: &Metadata, - eszip: eszip::EszipV2, - npm_vfs: Option<&VirtualDirectory>, - npm_files: &Vec>, + npm_snapshot: Option, + remote_modules_store: &RemoteModulesStoreBuilder, + vfs: VfsBuilder, compile_flags: &CompileFlags, ) -> Result<(), AnyError> { let metadata = serde_json::to_string(metadata)?.as_bytes().to_vec(); - let npm_vfs = serde_json::to_string(&npm_vfs)?.as_bytes().to_vec(); - let eszip_archive = eszip.into_bytes(); + let npm_snapshot = + npm_snapshot.map(serialize_npm_snapshot).unwrap_or_default(); + let (vfs, vfs_files) = vfs.into_dir_and_files(); + let vfs = serde_json::to_string(&vfs)?.as_bytes().to_vec(); let mut writer = Vec::new(); // write the trailer, which includes the positions // of the data blocks in the file writer.write_all(&{ - let metadata_pos = eszip_archive.len() as u64; - let npm_vfs_pos = metadata_pos + (metadata.len() as u64); - let npm_files_pos = npm_vfs_pos + (npm_vfs.len() as u64); + let npm_snapshot_pos = metadata.len() as u64; + let remote_modules_pos = npm_snapshot_pos + (npm_snapshot.len() as u64); + let vfs_pos = remote_modules_pos + remote_modules_store.total_len(); + let files_pos = vfs_pos + (vfs.len() as u64); Trailer { - eszip_pos: 0, - metadata_pos, - npm_vfs_pos, - npm_files_pos, + metadata_pos: 0, + npm_snapshot_pos, + remote_modules_pos, + vfs_pos, + files_pos, } .as_bytes() })?; - writer.write_all(&eszip_archive)?; writer.write_all(&metadata)?; - writer.write_all(&npm_vfs)?; - for file in npm_files { + writer.write_all(&npm_snapshot)?; + remote_modules_store.write(&mut writer)?; + writer.write_all(&vfs)?; + for file in &vfs_files { writer.write_all(file)?; } @@ -221,6 +255,64 @@ pub fn is_standalone_binary(exe_path: &Path) -> bool { || libsui::utils::is_macho(&data) } +pub struct StandaloneData { + pub fs: Arc, + pub metadata: Metadata, + pub modules: StandaloneModules, + pub npm_snapshot: Option, + pub root_path: PathBuf, + pub vfs: Arc, +} + +pub struct RemoteModuleData<'a> { + pub specifier: &'a ModuleSpecifier, + pub media_type: MediaType, + pub data: Cow<'static, [u8]>, +} + +pub struct StandaloneModules { + remote_modules: RemoteModulesStore, + vfs: Arc, +} + +impl StandaloneModules { + pub fn resolve_specifier<'a>( + &'a self, + specifier: &'a ModuleSpecifier, + ) -> Result, AnyError> { + if specifier.scheme() == "file" { + return Ok(Some(specifier)); + } else { + self.remote_modules.resolve_specifier(specifier) + } + } + + // todo(THIS PR): don't return Option? + pub fn read<'a>( + &'a self, + specifier: &'a ModuleSpecifier, + ) -> Result>, AnyError> { + if specifier.scheme() == "file" { + let path = deno_path_util::url_to_file_path(specifier)?; + let bytes = match self.vfs.file_entry(&path) { + Ok(entry) => self.vfs.read_file_all(entry)?, + Err(err) if err.kind() == ErrorKind::NotFound => { + let bytes = RealFs.read_file_sync(&path, None)?; + Cow::Owned(bytes) + } + Err(err) => return Err(err.into()), + }; + Ok(Some(RemoteModuleData { + media_type: MediaType::from_specifier(specifier), + specifier, + data: bytes, + })) + } else { + self.remote_modules.read(specifier) + } + } +} + /// This function will try to run this binary as a standalone binary /// produced by `deno compile`. It determines if this is a standalone /// binary by skipping over the trailer width at the end of the file, @@ -228,10 +320,7 @@ pub fn is_standalone_binary(exe_path: &Path) -> bool { /// the bundle is executed. If not, this function exits with `Ok(None)`. pub fn extract_standalone( cli_args: Cow>, -) -> Result< - Option>>, - AnyError, -> { +) -> Result, AnyError> { let Some(data) = libsui::find_section("d3n0l4nd") else { return Ok(None); }; @@ -241,44 +330,68 @@ pub fn extract_standalone( None => return Ok(None), Some(trailer) => trailer, }; + let data = &data[TRAILER_SIZE..]; + let root_path = { + let current_exe_path = std::env::current_exe().unwrap(); + let current_exe_name = + current_exe_path.file_name().unwrap().to_string_lossy(); + std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)) + }; let cli_args = cli_args.into_owned(); - // If we have an eszip, read it out - Ok(Some(async move { - let bufreader = - deno_core::futures::io::BufReader::new(&data[TRAILER_SIZE..]); - - let (eszip, loader) = eszip::EszipV2::parse(bufreader) - .await - .context("Failed to parse eszip header")?; - - let bufreader = loader.await.context("Failed to parse eszip archive")?; - - let mut metadata = String::new(); - - bufreader - .take(trailer.metadata_len()) - .read_to_string(&mut metadata) - .await - .context("Failed to read metadata from the current executable")?; - - let mut metadata: Metadata = serde_json::from_str(&metadata).unwrap(); - metadata.argv.reserve(cli_args.len() - 1); - for arg in cli_args.into_iter().skip(1) { - metadata.argv.push(arg.into_string().unwrap()); - } - - Ok((metadata, eszip)) + let mut metadata: Metadata = + serde_json::from_slice(&data[trailer.metadata_range()]) + .context("failed reading metadata")?; + metadata.argv.reserve(cli_args.len() - 1); + for arg in cli_args.into_iter().skip(1) { + metadata.argv.push(arg.into_string().unwrap()); + } + let remote_modules = + RemoteModulesStore::build(&data[trailer.remote_modules_range()])?; + let npm_snapshot_bytes = &data[trailer.npm_snapshot_range()]; + let npm_snapshot = if npm_snapshot_bytes.is_empty() { + None + } else { + Some(deserialize_npm_snapshot(npm_snapshot_bytes)?) + }; + let vfs = { + let vfs_data = &data[trailer.vfs_range()]; + let mut dir: VirtualDirectory = + serde_json::from_slice(vfs_data).context("failed reading vfs data")?; + + // align the name of the directory with the root dir + dir.name = root_path.file_name().unwrap().to_string_lossy().to_string(); + + let fs_root = VfsRoot { + dir, + root_path: root_path.clone(), + start_file_offset: trailer.files_pos, + }; + Arc::new(FileBackedVfs::new(Cow::Borrowed(data), fs_root)) + }; + let fs: Arc = + Arc::new(DenoCompileFileSystem::new(vfs.clone())); + Ok(Some(StandaloneData { + fs, + metadata, + modules: StandaloneModules { + remote_modules, + vfs: vfs.clone(), + }, + npm_snapshot, + root_path, + vfs, })) } const TRAILER_SIZE: usize = std::mem::size_of::() + 8; // 8 bytes for the magic trailer string struct Trailer { - eszip_pos: u64, metadata_pos: u64, - npm_vfs_pos: u64, - npm_files_pos: u64, + npm_snapshot_pos: u64, + remote_modules_pos: u64, + vfs_pos: u64, + files_pos: u64, } impl Trailer { @@ -288,38 +401,51 @@ impl Trailer { return Ok(None); } - let (eszip_archive_pos, rest) = rest.split_at(8); let (metadata_pos, rest) = rest.split_at(8); - let (npm_vfs_pos, npm_files_pos) = rest.split_at(8); - let eszip_archive_pos = u64_from_bytes(eszip_archive_pos)?; - let metadata_pos = u64_from_bytes(metadata_pos)?; - let npm_vfs_pos = u64_from_bytes(npm_vfs_pos)?; - let npm_files_pos = u64_from_bytes(npm_files_pos)?; + let (npm_snapshot_pos, rest) = rest.split_at(8); + let (remote_modules_pos, rest) = rest.split_at(8); + let (vfs_pos, files_pos) = rest.split_at(8); Ok(Some(Trailer { - eszip_pos: eszip_archive_pos, - metadata_pos, - npm_vfs_pos, - npm_files_pos, + metadata_pos: u64_from_bytes(metadata_pos)?, + npm_snapshot_pos: u64_from_bytes(npm_snapshot_pos)?, + remote_modules_pos: u64_from_bytes(remote_modules_pos)?, + vfs_pos: u64_from_bytes(vfs_pos)?, + files_pos: u64_from_bytes(files_pos)?, })) } pub fn metadata_len(&self) -> u64 { - self.npm_vfs_pos - self.metadata_pos + self.npm_snapshot_pos - self.metadata_pos + } + + pub fn metadata_range(&self) -> Range { + self.metadata_pos as usize..self.npm_snapshot_pos as usize + } + + pub fn npm_snapshot_range(&self) -> Range { + self.npm_snapshot_pos as usize..self.remote_modules_pos as usize + } + + pub fn remote_modules_range(&self) -> Range { + self.remote_modules_pos as usize..self.vfs_pos as usize } - pub fn npm_vfs_len(&self) -> u64 { - self.npm_files_pos - self.npm_vfs_pos + pub fn vfs_range(&self) -> Range { + self.vfs_pos as usize..self.files_pos as usize } pub fn as_bytes(&self) -> Vec { let mut trailer = MAGIC_TRAILER.to_vec(); - trailer.write_all(&self.eszip_pos.to_be_bytes()).unwrap(); trailer.write_all(&self.metadata_pos.to_be_bytes()).unwrap(); - trailer.write_all(&self.npm_vfs_pos.to_be_bytes()).unwrap(); trailer - .write_all(&self.npm_files_pos.to_be_bytes()) + .write_all(&self.npm_snapshot_pos.to_be_bytes()) .unwrap(); trailer + .write_all(&self.remote_modules_pos.to_be_bytes()) + .unwrap(); + trailer.write_all(&self.vfs_pos.to_be_bytes()).unwrap(); + trailer.write_all(&self.files_pos.to_be_bytes()).unwrap(); + trailer } } @@ -362,8 +488,8 @@ impl<'a> DenoCompileBinaryWriter<'a> { pub async fn write_bin( &self, writer: File, - eszip: eszip::EszipV2, - root_dir_url: EszipRelativeFileBaseUrl<'_>, + graph: &ModuleGraph, + root_dir_url: StandaloneRelativeFileBaseUrl<'_>, entrypoint: &ModuleSpecifier, compile_flags: &CompileFlags, cli_options: &CliOptions, @@ -393,7 +519,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { self.write_standalone_binary( writer, original_binary, - eszip, + graph, root_dir_url, entrypoint, cli_options, @@ -497,8 +623,8 @@ impl<'a> DenoCompileBinaryWriter<'a> { &self, writer: File, original_bin: Vec, - mut eszip: eszip::EszipV2, - root_dir_url: EszipRelativeFileBaseUrl<'_>, + graph: &ModuleGraph, + root_dir_url: StandaloneRelativeFileBaseUrl<'_>, entrypoint: &ModuleSpecifier, cli_options: &CliOptions, compile_flags: &CompileFlags, @@ -512,19 +638,17 @@ impl<'a> DenoCompileBinaryWriter<'a> { None => None, }; let root_path = root_dir_url.inner().to_file_path().unwrap(); - let (npm_vfs, npm_files, node_modules) = match self.npm_resolver.as_inner() + let (maybe_npm_vfs, node_modules, npm_snapshot) = match self + .npm_resolver + .as_inner() { InnerCliNpmResolverRef::Managed(managed) => { let snapshot = managed.serialized_valid_snapshot_for_system(&self.npm_system_info); if !snapshot.as_serialized().packages.is_empty() { - let (root_dir, files) = self - .build_vfs(&root_path, cli_options)? - .into_dir_and_files(); - eszip.add_npm_snapshot(snapshot); + let npm_vfs_builder = self.build_npm_vfs(&root_path, cli_options)?; ( - Some(root_dir), - files, + Some(npm_vfs_builder), Some(NodeModules::Managed { node_modules_dir: self.npm_resolver.root_node_modules_path().map( |path| { @@ -536,18 +660,16 @@ impl<'a> DenoCompileBinaryWriter<'a> { }, ), }), + Some(snapshot), ) } else { - (None, Vec::new(), None) + (None, None, None) } } InnerCliNpmResolverRef::Byonm(resolver) => { - let (root_dir, files) = self - .build_vfs(&root_path, cli_options)? - .into_dir_and_files(); + let npm_vfs_builder = VfsBuilder::new(root_path.clone())?; ( - Some(root_dir), - files, + Some(npm_vfs_builder), Some(NodeModules::Byonm { root_node_modules_dir: resolver.root_node_modules_path().map( |node_modules_dir| { @@ -560,9 +682,43 @@ impl<'a> DenoCompileBinaryWriter<'a> { }, ), }), + None, ) } }; + let mut vfs = if let Some(npm_vfs) = maybe_npm_vfs { + // todo: probably need to modify this a bit + npm_vfs + } else { + VfsBuilder::new(root_path.clone())? + }; + let mut remote_modules_store = RemoteModulesStoreBuilder::default(); + for module in graph.modules() { + if module.specifier().scheme() == "file" { + let file_path = deno_path_util::url_to_file_path(module.specifier())?; + vfs.add_file_with_data( + &file_path, + match module.source() { + Some(source) => source.as_bytes().to_vec(), + None => Vec::new(), + }, + )?; + } else if let Some(source) = module.source() { + let media_type = match module { + deno_graph::Module::Js(m) => m.media_type, + deno_graph::Module::Json(m) => m.media_type, + deno_graph::Module::Npm(_) + | deno_graph::Module::Node(_) + | deno_graph::Module::External(_) => MediaType::Unknown, + }; + remote_modules_store.add( + module.specifier(), + media_type, + source.as_bytes().to_vec(), + ); + } + } + remote_modules_store.add_redirects(&graph.redirects); let env_vars_from_env_file = match cli_options.env_file_name() { Some(env_filename) => { @@ -636,14 +792,14 @@ impl<'a> DenoCompileBinaryWriter<'a> { writer, original_bin, &metadata, - eszip, - npm_vfs.as_ref(), - &npm_files, + npm_snapshot.map(|s| s.into_serialized()), + &remote_modules_store, + vfs, compile_flags, ) } - fn build_vfs( + fn build_npm_vfs( &self, root_path: &Path, cli_options: &CliOptions, @@ -754,6 +910,235 @@ impl<'a> DenoCompileBinaryWriter<'a> { } } +enum RemoteModulesStoreSpecifierValue { + Data(usize), + Redirect(ModuleSpecifier), +} + +pub struct RemoteModulesStore { + specifiers: HashMap, + files_data: &'static [u8], +} + +impl RemoteModulesStore { + pub fn build(data: &'static [u8]) -> Result { + fn read_specifier( + input: &[u8], + ) -> Result<(&[u8], (ModuleSpecifier, u64)), AnyError> { + let (input, specifier) = read_string_lossy(input)?; + let specifier = ModuleSpecifier::parse(&specifier)?; + let (input, offset) = read_u64(input)?; + Ok((input, (specifier, offset))) + } + + fn read_redirect( + input: &[u8], + ) -> Result<(&[u8], (ModuleSpecifier, ModuleSpecifier)), AnyError> { + let (input, from) = read_string_lossy(input)?; + let from = ModuleSpecifier::parse(&from)?; + let (input, to) = read_string_lossy(input)?; + let to = ModuleSpecifier::parse(&to)?; + Ok((input, (from, to))) + } + + fn read_headers( + input: &[u8], + ) -> Result< + ( + &[u8], + HashMap, + ), + AnyError, + > { + let (input, specifiers_len) = read_u32_as_usize(input)?; + let (mut input, redirects_len) = read_u32_as_usize(input)?; + let mut specifiers = + HashMap::with_capacity(specifiers_len + redirects_len); + for _ in 0..specifiers_len { + let (current_input, (specifier, offset)) = read_specifier(input)?; + input = current_input; + specifiers.insert( + specifier, + RemoteModulesStoreSpecifierValue::Data(offset as usize), + ); + } + + for _ in 0..redirects_len { + let (current_input, (from, to)) = read_redirect(input)?; + input = current_input; + specifiers.insert(from, RemoteModulesStoreSpecifierValue::Redirect(to)); + } + + Ok((input, specifiers)) + } + + let (files_data, specifiers) = read_headers(data)?; + + Ok(Self { + specifiers, + files_data, + }) + } + + pub fn resolve_specifier<'a>( + &'a self, + specifier: &'a ModuleSpecifier, + ) -> Result, AnyError> { + let mut count = 0; + let mut current = specifier; + loop { + if count > 10 { + bail!("Too many redirects resolving '{}'", specifier); + } + match self.specifiers.get(current) { + Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { + current = to; + count += 1; + } + Some(RemoteModulesStoreSpecifierValue::Data(_)) => { + return Ok(Some(current)); + } + None => { + return Ok(None); + } + } + } + } + + pub fn read<'a>( + &'a self, + specifier: &'a ModuleSpecifier, + ) -> Result>, AnyError> { + let mut count = 0; + let mut current = specifier; + loop { + if count > 10 { + bail!("Too many redirects resolving '{}'", specifier); + } + match self.specifiers.get(current) { + Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { + current = to; + count += 1; + } + Some(RemoteModulesStoreSpecifierValue::Data(offset)) => { + let files_data = &self.files_data[*offset..]; + let media_type = deserialize_media_type(files_data[0])?; + let (input, len) = read_u64(&files_data[1..])?; + let data = &input[..len as usize]; + return Ok(Some(RemoteModuleData { + specifier, + media_type, + data: Cow::Borrowed(data), + })); + } + None => { + return Ok(None); + } + } + } + } +} + +// todo(THIS PR): make this better +fn serialize_media_type(media_type: MediaType) -> u8 { + match media_type { + MediaType::JavaScript => 0, + MediaType::Jsx => 1, + MediaType::Mjs => 2, + MediaType::Cjs => 3, + MediaType::TypeScript => 4, + MediaType::Mts => 5, + MediaType::Cts => 6, + MediaType::Dts => 7, + MediaType::Dmts => 8, + MediaType::Dcts => 9, + MediaType::Tsx => 10, + MediaType::Json => 11, + MediaType::Wasm => 12, + MediaType::TsBuildInfo => 13, + MediaType::SourceMap => 14, + MediaType::Unknown => 15, + } +} + +fn deserialize_media_type(value: u8) -> Result { + match value { + 0 => Ok(MediaType::JavaScript), + 1 => Ok(MediaType::Jsx), + 2 => Ok(MediaType::Mjs), + 3 => Ok(MediaType::Cjs), + 4 => Ok(MediaType::TypeScript), + 5 => Ok(MediaType::Mts), + 6 => Ok(MediaType::Cts), + 7 => Ok(MediaType::Dts), + 8 => Ok(MediaType::Dmts), + 9 => Ok(MediaType::Dcts), + 10 => Ok(MediaType::Tsx), + 11 => Ok(MediaType::Json), + 12 => Ok(MediaType::Wasm), + 13 => Ok(MediaType::TsBuildInfo), + 14 => Ok(MediaType::SourceMap), + 15 => Ok(MediaType::Unknown), + _ => bail!("Unknown media type value: {}", value), + } +} + +#[derive(Default)] +struct RemoteModulesStoreBuilder { + specifiers: Vec<(String, u64)>, + data: Vec<(MediaType, Vec)>, + specifiers_byte_len: u64, + data_byte_len: u64, + redirects: Vec<(String, String)>, + redirects_len: u64, +} + +impl RemoteModulesStoreBuilder { + pub fn add(&mut self, specifier: &Url, media_type: MediaType, data: Vec) { + let specifier = specifier.to_string(); + self.specifiers_byte_len += 4 + specifier.len() as u64 + 8; + self.specifiers.push((specifier, self.data_byte_len)); + self.data_byte_len += 1 + 8 + data.len() as u64; + self.data.push((media_type, data)); + } + + pub fn add_redirects(&mut self, redirects: &BTreeMap) { + self.redirects.reserve(redirects.len()); + for (from, to) in redirects { + let from = from.to_string(); + let to = to.to_string(); + self.redirects_len += (4 + from.len() + 4 + to.len()) as u64; + self.redirects.push((from, to)); + } + } + + pub fn total_len(&self) -> u64 { + 4 + 4 + self.specifiers_byte_len + self.redirects_len + self.data_byte_len + } + + pub fn write(&self, writer: &mut dyn Write) -> Result<(), AnyError> { + writer.write_all(&(self.specifiers.len() as u32).to_be_bytes())?; + writer.write_all(&(self.redirects.len() as u32).to_be_bytes())?; + for (specifier, offset) in &self.specifiers { + writer.write_all(&(specifier.len() as u32).to_be_bytes())?; + writer.write_all(specifier.as_bytes())?; + writer.write_all(&offset.to_be_bytes())?; + } + for (from, to) in &self.redirects { + writer.write_all(&(from.len() as u32).to_be_bytes())?; + writer.write_all(from.as_bytes())?; + writer.write_all(&(to.len() as u32).to_be_bytes())?; + writer.write_all(to.as_bytes())?; + } + for (media_type, data) in &self.data { + writer.write_all(&[serialize_media_type(*media_type)])?; + writer.write_all(&(data.len() as u32).to_be_bytes())?; + writer.write_all(data)?; + } + Ok(()) + } +} + /// This function returns the environment variables specified /// in the passed environment file. fn get_file_env_vars( @@ -807,3 +1192,153 @@ fn set_windows_binary_to_gui(bin: &mut [u8]) -> Result<(), AnyError> { .copy_from_slice(&subsystem.to_le_bytes()); Ok(()) } + +fn serialize_npm_snapshot( + mut snapshot: SerializedNpmResolutionSnapshot, +) -> Vec { + fn append_string(bytes: &mut Vec, string: &str) { + let len = string.len() as u32; + bytes.extend_from_slice(&len.to_be_bytes()); + bytes.extend_from_slice(string.as_bytes()); + } + + snapshot.packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism + let ids_to_stored_ids = snapshot + .packages + .iter() + .enumerate() + .map(|(i, pkg)| (&pkg.id, i as u32)) + .collect::>(); + + let mut root_packages: Vec<_> = snapshot.root_packages.iter().collect(); + root_packages.sort(); + let mut bytes = Vec::new(); + + bytes.extend(&(snapshot.packages.len() as u32).to_be_bytes()); + for pkg in &snapshot.packages { + append_string(&mut bytes, &pkg.id.as_serialized()); + } + + bytes.extend(&(root_packages.len() as u32).to_be_bytes()); + for (req, id) in root_packages { + append_string(&mut bytes, &req.to_string()); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_be_bytes()); + } + + for pkg in &snapshot.packages { + let deps_len = pkg.dependencies.len() as u32; + bytes.extend_from_slice(&deps_len.to_be_bytes()); + let mut deps: Vec<_> = pkg.dependencies.iter().collect(); + deps.sort(); + for (req, id) in deps { + append_string(&mut bytes, req); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_be_bytes()); + } + } + + bytes +} + +fn deserialize_npm_snapshot( + data: &[u8], +) -> Result { + fn read_root_package( + data: &[u8], + ) -> Result<(&[u8], (PackageReq, usize)), AnyError> { + let (data, req) = read_string_lossy(data)?; + let req = PackageReq::from_str(&req)?; + let (data, id) = read_u32_as_usize(data)?; + Ok((data, (req, id))) + } + + let (mut data, packages_len) = read_u32_as_usize(data)?; + + // get a hashmap of all the npm package ids to their serialized ids + let mut data_ids_to_npm_ids = Vec::with_capacity(packages_len); + for _ in 0..packages_len { + let (current_data, id) = read_string_lossy(data)?; + data = current_data; + let id = NpmPackageId::from_serialized(&id)?; + data_ids_to_npm_ids.push(id); + } + + let (mut data, root_packages_len) = read_u32_as_usize(data)?; + let mut root_packages = HashMap::with_capacity(root_packages_len); + for _ in 0..root_packages_len { + let (current_data, (req, id)) = read_root_package(data)?; + data = current_data; + root_packages.insert(req, data_ids_to_npm_ids[id].clone()); + } + + let mut packages = Vec::with_capacity(packages_len); + for _ in 0..packages_len { + let (current_data, id) = read_u32_as_usize(data)?; + data = current_data; + let id = data_ids_to_npm_ids[id].clone(); + let (current_data, deps_len) = read_u32_as_usize(data)?; + data = current_data; + let mut dependencies = HashMap::with_capacity(deps_len); + for _ in 0..deps_len { + let (current_data, req) = read_string_lossy(data)?; + data = current_data; + let (current_data, id) = read_u32_as_usize(data)?; + data = current_data; + // todo(THIS PR): handle when id >= data_ids_to_npm_ids.len() + dependencies.insert(req.into_owned(), data_ids_to_npm_ids[id].clone()); + } + + packages.push(SerializedNpmResolutionSnapshotPackage { + id: id, + system: Default::default(), + dist: Default::default(), + dependencies, + optional_dependencies: Default::default(), + bin: None, + scripts: Default::default(), + deprecated: Default::default(), + }); + } + + if !data.is_empty() { + bail!("Unexpected data left over"); + } + + Ok( + SerializedNpmResolutionSnapshot { + packages, + root_packages, + } + // this is ok because we have already verified that all the + // identifiers found in the snapshot are valid via the + // npm package id -> npm package id mapping + .into_valid_unsafe(), + ) +} + +fn read_string_lossy(data: &[u8]) -> Result<(&[u8], Cow), AnyError> { + let (data, str_len) = read_u32_as_usize(data)?; + if data.len() < str_len { + bail!("Unexpected end of data"); + } + Ok((data, String::from_utf8_lossy(&data[..str_len]))) +} + +fn read_u32_as_usize(data: &[u8]) -> Result<(&[u8], usize), AnyError> { + if data.len() < 4 { + bail!("Unexpected end of data"); + } + let (len_bytes, rest) = data.split_at(4); + let len = u32::from_be_bytes(len_bytes.try_into()?); + Ok((rest, len as usize)) +} + +fn read_u64(data: &[u8]) -> Result<(&[u8], u64), AnyError> { + if data.len() < 8 { + bail!("Unexpected end of data"); + } + let (len_bytes, rest) = data.split_at(8); + let len = u64::from_be_bytes(len_bytes.try_into()?); + Ok((rest, len)) +} diff --git a/cli/standalone/file_system.rs b/cli/standalone/file_system.rs index 314444630bc2d3..712c6ee91878e3 100644 --- a/cli/standalone/file_system.rs +++ b/cli/standalone/file_system.rs @@ -22,8 +22,8 @@ use super::virtual_fs::FileBackedVfs; pub struct DenoCompileFileSystem(Arc); impl DenoCompileFileSystem { - pub fn new(vfs: FileBackedVfs) -> Self { - Self(Arc::new(vfs)) + pub fn new(vfs: Arc) -> Self { + Self(vfs) } fn error_if_in_vfs(&self, path: &Path) -> FsResult<()> { diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 60018228b7cb72..8604feef03ff4d 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -5,6 +5,8 @@ #![allow(dead_code)] #![allow(unused_imports)] +use binary::StandaloneData; +use binary::StandaloneModules; use deno_ast::MediaType; use deno_cache_dir::npm::NpmCacheDir; use deno_config::workspace::MappedResolution; @@ -38,7 +40,6 @@ use deno_runtime::permissions::RuntimePermissionDescriptorParser; use deno_runtime::WorkerExecutionMode; use deno_runtime::WorkerLogLevel; use deno_semver::npm::NpmPackageReqReference; -use eszip::EszipRelativeFileBaseUrl; use import_map::parse_from_json; use node_resolver::analyze::NodeCodeTranslator; use node_resolver::NodeResolutionMode; @@ -84,46 +85,11 @@ pub use binary::extract_standalone; pub use binary::is_standalone_binary; pub use binary::DenoCompileBinaryWriter; -use self::binary::load_npm_vfs; use self::binary::Metadata; use self::file_system::DenoCompileFileSystem; -struct WorkspaceEszipModule { - specifier: ModuleSpecifier, - inner: eszip::Module, -} - -struct WorkspaceEszip { - eszip: eszip::EszipV2, - root_dir_url: Arc, -} - -impl WorkspaceEszip { - pub fn get_module( - &self, - specifier: &ModuleSpecifier, - ) -> Option { - if specifier.scheme() == "file" { - let specifier_key = EszipRelativeFileBaseUrl::new(&self.root_dir_url) - .specifier_key(specifier); - let module = self.eszip.get_module(&specifier_key)?; - let specifier = self.root_dir_url.join(&module.specifier).unwrap(); - Some(WorkspaceEszipModule { - specifier, - inner: module, - }) - } else { - let module = self.eszip.get_module(specifier.as_str())?; - Some(WorkspaceEszipModule { - specifier: ModuleSpecifier::parse(&module.specifier).unwrap(), - inner: module, - }) - } - } -} - struct SharedModuleLoaderState { - eszip: WorkspaceEszip, + modules: StandaloneModules, workspace_resolver: WorkspaceResolver, node_resolver: Arc, npm_module_loader: Arc, @@ -249,8 +215,10 @@ impl ModuleLoader for EmbeddedModuleLoader { } if specifier.scheme() == "jsr" { - if let Some(module) = self.shared.eszip.get_module(&specifier) { - return Ok(module.specifier); + if let Some(specifier) = + self.shared.modules.resolve_specifier(&specifier)? + { + return Ok(specifier.clone()); } } @@ -345,56 +313,56 @@ impl ModuleLoader for EmbeddedModuleLoader { ); } - let Some(module) = self.shared.eszip.get_module(original_specifier) else { - return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( - "{MODULE_NOT_FOUND}: {}", - original_specifier - )))); - }; - let original_specifier = original_specifier.clone(); - - deno_core::ModuleLoadResponse::Async( - async move { - let code = module.inner.source().await.ok_or_else(|| { - type_error(format!("Module not found: {}", original_specifier)) - })?; - let code = arc_u8_to_arc_str(code) - .map_err(|_| type_error("Module source is not utf-8"))?; - Ok(deno_core::ModuleSource::new_with_redirect( - match module.inner.kind { - eszip::ModuleKind::JavaScript => ModuleType::JavaScript, - eszip::ModuleKind::Json => ModuleType::Json, - eszip::ModuleKind::Jsonc => { - return Err(type_error("jsonc modules not supported")) - } - eszip::ModuleKind::OpaqueData => { - unreachable!(); - } - }, - ModuleSourceCode::String(code.into()), - &original_specifier, - &module.specifier, - None, + match self.shared.modules.read(original_specifier) { + Ok(Some(module)) => { + return deno_core::ModuleLoadResponse::Sync(Ok( + deno_core::ModuleSource::new_with_redirect( + match module.media_type { + MediaType::JavaScript + | MediaType::Jsx + | MediaType::Mjs + | MediaType::Cjs + | MediaType::TypeScript + | MediaType::Mts + | MediaType::Cts + | MediaType::Dts + | MediaType::Dmts + | MediaType::Dcts + | MediaType::Tsx => ModuleType::JavaScript, + MediaType::Json => ModuleType::Json, + MediaType::Wasm => ModuleType::Wasm, + MediaType::TsBuildInfo + | MediaType::SourceMap + | MediaType::Unknown => { + unreachable!(); + } + }, + ModuleSourceCode::Bytes(match module.data { + Cow::Borrowed(d) => d.into(), + Cow::Owned(d) => d.into_boxed_slice().into(), + }), + &original_specifier, + &module.specifier, + None, + ), )) } - .boxed_local(), - ) + Ok(None) => { + return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( + "{MODULE_NOT_FOUND}: {}", + original_specifier + )))); + } + Err(err) => { + return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( + "{:?}", + err + )))); + } + } } } -fn arc_u8_to_arc_str( - arc_u8: Arc<[u8]>, -) -> Result, std::str::Utf8Error> { - // Check that the string is valid UTF-8. - std::str::from_utf8(&arc_u8)?; - // SAFETY: the string is valid UTF-8, and the layout Arc<[u8]> is the same as - // Arc. This is proven by the From> impl for Arc<[u8]> from the - // standard library. - Ok(unsafe { - std::mem::transmute::, std::sync::Arc>(arc_u8) - }) -} - struct StandaloneModuleLoaderFactory { shared: Arc, } @@ -439,13 +407,15 @@ impl RootCertStoreProvider for StandaloneRootCertStoreProvider { } } -pub async fn run( - mut eszip: eszip::EszipV2, - metadata: Metadata, -) -> Result { - let current_exe_path = std::env::current_exe().unwrap(); - let current_exe_name = - current_exe_path.file_name().unwrap().to_string_lossy(); +pub async fn run(data: StandaloneData) -> Result { + let StandaloneData { + fs, + metadata, + modules, + npm_snapshot, + root_path, + vfs, + } = data; let deno_dir_provider = Arc::new(DenoDirProvider::new(None)); let root_cert_store_provider = Arc::new(StandaloneRootCertStoreProvider { ca_stores: metadata.ca_stores, @@ -459,8 +429,6 @@ pub async fn run( )); // use a dummy npm registry url let npm_registry_url = ModuleSpecifier::parse("https://localhost/").unwrap(); - let root_path = - std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)); let root_dir_url = Arc::new(ModuleSpecifier::from_directory_path(&root_path).unwrap()); let main_module = root_dir_url.join(&metadata.entrypoint_key).unwrap(); @@ -472,21 +440,11 @@ pub async fn run( ); let npm_global_cache_dir = npm_cache_dir.get_cache_location(); let cache_setting = CacheSetting::Only; - let (fs, npm_resolver, maybe_vfs_root) = match metadata.node_modules { + let (fs, npm_resolver) = match metadata.node_modules { Some(binary::NodeModules::Managed { node_modules_dir }) => { - // this will always have a snapshot - let snapshot = eszip.take_npm_snapshot().unwrap(); - let vfs_root_dir_path = if node_modules_dir.is_some() { - root_path.clone() - } else { - npm_cache_dir.root_dir().to_owned() - }; - let vfs = load_npm_vfs(vfs_root_dir_path.clone()) - .context("Failed to load npm vfs.")?; + let snapshot = npm_snapshot.unwrap(); let maybe_node_modules_path = node_modules_dir - .map(|node_modules_dir| vfs_root_dir_path.join(node_modules_dir)); - let fs = Arc::new(DenoCompileFileSystem::new(vfs)) - as Arc; + .map(|node_modules_dir| root_path.join(node_modules_dir)); let npm_resolver = create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( CliNpmResolverManagedCreateOptions { @@ -518,18 +476,13 @@ pub async fn run( }, )) .await?; - (fs, npm_resolver, Some(vfs_root_dir_path)) + (fs, npm_resolver) } Some(binary::NodeModules::Byonm { root_node_modules_dir, }) => { - let vfs_root_dir_path = root_path.clone(); - let vfs = load_npm_vfs(vfs_root_dir_path.clone()) - .context("Failed to load vfs.")?; let root_node_modules_dir = root_node_modules_dir.map(|p| vfs.root().join(p)); - let fs = Arc::new(DenoCompileFileSystem::new(vfs)) - as Arc; let npm_resolver = create_cli_npm_resolver( CliNpmResolverCreateOptions::Byonm(CliByonmNpmResolverCreateOptions { fs: CliDenoResolverFs(fs.clone()), @@ -537,7 +490,7 @@ pub async fn run( }), ) .await?; - (fs, npm_resolver, Some(vfs_root_dir_path)) + (fs, npm_resolver) } None => { let fs = Arc::new(deno_fs::RealFs) as Arc; @@ -564,7 +517,7 @@ pub async fn run( }, )) .await?; - (fs, npm_resolver, None) + (fs, npm_resolver) } }; @@ -645,10 +598,7 @@ pub async fn run( }; let module_loader_factory = StandaloneModuleLoaderFactory { shared: Arc::new(SharedModuleLoaderState { - eszip: WorkspaceEszip { - eszip, - root_dir_url, - }, + modules, workspace_resolver, node_resolver: cli_node_resolver.clone(), npm_module_loader: Arc::new(NpmModuleLoader::new( @@ -663,19 +613,17 @@ pub async fn run( let permissions = { let mut permissions = metadata.permissions.to_options(/* cli_arg_urls */ &[]); - // if running with an npm vfs, grant read access to it - if let Some(vfs_root) = maybe_vfs_root { - match &mut permissions.allow_read { - Some(vec) if vec.is_empty() => { - // do nothing, already granted - } - Some(vec) => { - vec.push(vfs_root.to_string_lossy().to_string()); - } - None => { - permissions.allow_read = - Some(vec![vfs_root.to_string_lossy().to_string()]); - } + // grant read access to the vfs + match &mut permissions.allow_read { + Some(vec) if vec.is_empty() => { + // do nothing, already granted + } + Some(vec) => { + vec.push(root_path.to_string_lossy().to_string()); + } + None => { + permissions.allow_read = + Some(vec![root_path.to_string_lossy().to_string()]); } } diff --git a/cli/standalone/virtual_fs.rs b/cli/standalone/virtual_fs.rs index 53d045b6257614..b774b88f0628ec 100644 --- a/cli/standalone/virtual_fs.rs +++ b/cli/standalone/virtual_fs.rs @@ -7,6 +7,7 @@ use std::fs::File; use std::io::Read; use std::io::Seek; use std::io::SeekFrom; +use std::ops::Range; use std::path::Path; use std::path::PathBuf; use std::rc::Rc; @@ -119,7 +120,7 @@ impl VfsBuilder { // inline the symlink and make the target file let file_bytes = std::fs::read(&target) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file(&path, file_bytes)?; + self.add_file_with_data(&path, file_bytes)?; } else { log::warn!( "{} Symlink target is outside '{}'. Excluding symlink at '{}' with target '{}'.", @@ -197,10 +198,14 @@ impl VfsBuilder { ) -> Result<(), AnyError> { let file_bytes = std::fs::read(path) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file(path, file_bytes) + self.add_file_with_data(path, file_bytes) } - fn add_file(&mut self, path: &Path, data: Vec) -> Result<(), AnyError> { + pub fn add_file_with_data( + &mut self, + path: &Path, + data: Vec, + ) -> Result<(), AnyError> { log::debug!("Adding file '{}'", path.display()); let checksum = util::checksum::gen(&[&data]); let offset = if let Some(offset) = self.file_offsets.get(&checksum) { @@ -751,14 +756,14 @@ impl deno_io::fs::File for FileBackedVfsFile { #[derive(Debug)] pub struct FileBackedVfs { - file: Mutex>, + vfs_data: Cow<'static, [u8]>, fs_root: VfsRoot, } impl FileBackedVfs { - pub fn new(file: Vec, fs_root: VfsRoot) -> Self { + pub fn new(data: Cow<'static, [u8]>, fs_root: VfsRoot) -> Self { Self { - file: Mutex::new(file), + vfs_data: data, fs_root, } } @@ -827,10 +832,15 @@ impl FileBackedVfs { Ok(path) } - pub fn read_file_all(&self, file: &VirtualFile) -> std::io::Result> { - let mut buf = vec![0; file.len as usize]; - self.read_file(file, 0, &mut buf)?; - Ok(buf) + pub fn read_file_all( + &self, + file: &VirtualFile, + ) -> std::io::Result> { + let read_range = self.get_read_range(file, 0, file.len)?; + match &self.vfs_data { + Cow::Borrowed(data) => Ok(Cow::Borrowed(&data[read_range])), + Cow::Owned(data) => Ok(Cow::Owned(data[read_range].to_vec())), + } } pub fn read_file( @@ -839,18 +849,27 @@ impl FileBackedVfs { pos: u64, buf: &mut [u8], ) -> std::io::Result { - let data = self.file.lock(); + let read_range = self.get_read_range(file, pos, buf.len() as u64)?; + buf.copy_from_slice(&self.vfs_data[read_range]); + Ok(buf.len()) + } + + fn get_read_range( + &self, + file: &VirtualFile, + pos: u64, + len: u64, + ) -> std::io::Result> { + let data = &self.vfs_data; let start = self.fs_root.start_file_offset + file.offset + pos; - let end = start + buf.len() as u64; + let end = start + len; if end > data.len() as u64 { return Err(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "unexpected EOF", )); } - - buf.copy_from_slice(&data[start as usize..end as usize]); - Ok(buf.len()) + Ok(start as usize..end as usize) } pub fn dir_entry(&self, path: &Path) -> std::io::Result<&VirtualDirectory> { @@ -888,7 +907,7 @@ mod test { #[track_caller] fn read_file(vfs: &FileBackedVfs, path: &Path) -> String { let file = vfs.file_entry(path).unwrap(); - String::from_utf8(vfs.read_file_all(file).unwrap()).unwrap() + String::from_utf8(vfs.read_file_all(file).unwrap().into_owned()).unwrap() } #[test] @@ -901,20 +920,20 @@ mod test { let src_path = src_path.to_path_buf(); let mut builder = VfsBuilder::new(src_path.clone()).unwrap(); builder - .add_file(&src_path.join("a.txt"), "data".into()) + .add_file_with_data(&src_path.join("a.txt"), "data".into()) .unwrap(); builder - .add_file(&src_path.join("b.txt"), "data".into()) + .add_file_with_data(&src_path.join("b.txt"), "data".into()) .unwrap(); assert_eq!(builder.files.len(), 1); // because duplicate data builder - .add_file(&src_path.join("c.txt"), "c".into()) + .add_file_with_data(&src_path.join("c.txt"), "c".into()) .unwrap(); builder - .add_file(&src_path.join("sub_dir").join("d.txt"), "d".into()) + .add_file_with_data(&src_path.join("sub_dir").join("d.txt"), "d".into()) .unwrap(); builder - .add_file(&src_path.join("e.txt"), "e".into()) + .add_file_with_data(&src_path.join("e.txt"), "e".into()) .unwrap(); builder .add_symlink( @@ -1031,7 +1050,7 @@ mod test { ( dest_path.to_path_buf(), FileBackedVfs::new( - data, + Cow::Owned(data), VfsRoot { dir: root_dir, root_path: dest_path.to_path_buf(), @@ -1082,7 +1101,7 @@ mod test { let temp_path = temp_dir.path().canonicalize(); let mut builder = VfsBuilder::new(temp_path.to_path_buf()).unwrap(); builder - .add_file( + .add_file_with_data( temp_path.join("a.txt").as_path(), "0123456789".to_string().into_bytes(), ) diff --git a/cli/tools/compile.rs b/cli/tools/compile.rs index 3cc4414fcb170f..bca280ffc7e680 100644 --- a/cli/tools/compile.rs +++ b/cli/tools/compile.rs @@ -5,6 +5,7 @@ use crate::args::CompileFlags; use crate::args::Flags; use crate::factory::CliFactory; use crate::http_util::HttpClientProvider; +use crate::standalone::binary::StandaloneRelativeFileBaseUrl; use crate::standalone::is_standalone_binary; use deno_ast::ModuleSpecifier; use deno_core::anyhow::bail; @@ -14,7 +15,6 @@ use deno_core::error::AnyError; use deno_core::resolve_url_or_path; use deno_graph::GraphKind; use deno_terminal::colors; -use eszip::EszipRelativeFileBaseUrl; use rand::Rng; use std::path::Path; use std::path::PathBuf; @@ -29,7 +29,6 @@ pub async fn compile( let factory = CliFactory::from_flags(flags); let cli_options = factory.cli_options()?; let module_graph_creator = factory.module_graph_creator().await?; - let parsed_source_cache = factory.parsed_source_cache(); let binary_writer = factory.create_compile_binary_writer().await?; let http_client = factory.http_client_provider(); let module_specifier = cli_options.resolve_main_module()?; @@ -80,7 +79,7 @@ pub async fn compile( let graph = if cli_options.type_check_mode().is_true() { // In this case, the previous graph creation did type checking, which will // create a module graph with types information in it. We don't want to - // store that in the eszip so create a code only module graph from scratch. + // store that in the binary so create a code only module graph from scratch. module_graph_creator .create_graph(GraphKind::CodeOnly, module_roots) .await? @@ -91,11 +90,6 @@ pub async fn compile( let ts_config_for_emit = cli_options .resolve_ts_config_for_emit(deno_config::deno_json::TsConfigType::Emit)?; check_warn_tsconfig(&ts_config_for_emit); - let (transpile_options, emit_options) = - crate::args::ts_config_to_transpile_and_emit_options( - ts_config_for_emit.ts_config, - )?; - let parser = parsed_source_cache.as_capturing_parser(); let root_dir_url = resolve_root_dir_from_specifiers( cli_options.workspace().root_dir(), graph.specifiers().map(|(s, _)| s).chain( @@ -106,17 +100,6 @@ pub async fn compile( ), ); log::debug!("Binary root dir: {}", root_dir_url); - let root_dir_url = EszipRelativeFileBaseUrl::new(&root_dir_url); - let eszip = eszip::EszipV2::from_graph(eszip::FromGraphOptions { - graph, - parser, - transpile_options, - emit_options, - // make all the modules relative to the root folder - relative_file_base: Some(root_dir_url), - npm_packages: None, - })?; - log::info!( "{} {} to {}", colors::green("Compile"), @@ -143,8 +126,8 @@ pub async fn compile( let write_result = binary_writer .write_bin( file, - eszip, - root_dir_url, + &graph, + StandaloneRelativeFileBaseUrl::from(&root_dir_url), module_specifier, &compile_flags, cli_options, diff --git a/cli/util/text_encoding.rs b/cli/util/text_encoding.rs index 0b7601cb9c1608..0739fcf25c396e 100644 --- a/cli/util/text_encoding.rs +++ b/cli/util/text_encoding.rs @@ -103,6 +103,19 @@ pub fn arc_str_to_bytes(arc_str: Arc) -> Arc<[u8]> { unsafe { Arc::from_raw(raw as *const [u8]) } } +pub fn arc_u8_to_arc_str( + arc_u8: Arc<[u8]>, +) -> Result, std::str::Utf8Error> { + // Check that the string is valid UTF-8. + std::str::from_utf8(&arc_u8)?; + // SAFETY: the string is valid UTF-8, and the layout Arc<[u8]> is the same as + // Arc. This is proven by the From> impl for Arc<[u8]> from the + // standard library. + Ok(unsafe { + std::mem::transmute::, std::sync::Arc>(arc_u8) + }) +} + #[cfg(test)] mod tests { use std::sync::Arc; From 1928daf0bda50325a0c298bf20ffd84fa5c8a5b7 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 23 Oct 2024 14:53:50 -0400 Subject: [PATCH 02/20] working now with typescript --- cli/emit.rs | 1 + cli/factory.rs | 1 + cli/standalone/binary.rs | 623 +++++--------------------------- cli/standalone/mod.rs | 82 ++--- cli/standalone/serialization.rs | 590 ++++++++++++++++++++++++++++++ cli/util/text_encoding.rs | 2 + 6 files changed, 714 insertions(+), 585 deletions(-) create mode 100644 cli/standalone/serialization.rs diff --git a/cli/emit.rs b/cli/emit.rs index b3f4a4477aed94..ad200af0504222 100644 --- a/cli/emit.rs +++ b/cli/emit.rs @@ -60,6 +60,7 @@ impl Emitter { continue; }; + // todo(https://github.com/denoland/deno_media_type/pull/12): use is_emittable() let is_emittable = matches!( module.media_type, MediaType::TypeScript diff --git a/cli/factory.rs b/cli/factory.rs index 25f3551102253f..d5ef4fd8b38714 100644 --- a/cli/factory.rs +++ b/cli/factory.rs @@ -762,6 +762,7 @@ impl CliFactory { let cli_options = self.cli_options()?; Ok(DenoCompileBinaryWriter::new( self.deno_dir()?, + self.emitter()?, self.file_fetcher()?, self.http_client_provider(), self.npm_resolver().await?.as_ref(), diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 394b1a7938609d..7060368ce1baa4 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -44,6 +44,7 @@ use deno_npm::NpmSystemInfo; use deno_runtime::deno_fs; use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_fs::RealFs; +use deno_runtime::deno_io::fs::FsError; use deno_runtime::deno_node::PackageJson; use deno_semver::npm::NpmVersionReqParseError; use deno_semver::package::PackageReq; @@ -61,6 +62,7 @@ use crate::args::NpmInstallDepsProvider; use crate::args::PermissionFlags; use crate::args::UnstableConfig; use crate::cache::DenoDir; +use crate::emit::Emitter; use crate::file_fetcher::FileFetcher; use crate::http_util::HttpClientProvider; use crate::npm::CliNpmResolver; @@ -73,6 +75,12 @@ use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use super::file_system::DenoCompileFileSystem; +use super::serialization::deserialize_binary_data_section; +use super::serialization::serialize_binary_data_section; +use super::serialization::DeserializedDataSection; +use super::serialization::RemoteModuleData; +use super::serialization::RemoteModulesStore; +use super::serialization::RemoteModulesStoreBuilder; use super::virtual_fs::FileBackedVfs; use super::virtual_fs::VfsBuilder; use super::virtual_fs::VfsRoot; @@ -124,8 +132,6 @@ impl<'a> StandaloneRelativeFileBaseUrl<'a> { } } -const MAGIC_TRAILER: &[u8; 8] = b"d3n0l4nd"; - #[derive(Deserialize, Serialize)] pub enum NodeModules { Managed { @@ -184,48 +190,18 @@ fn write_binary_bytes( original_bin: Vec, metadata: &Metadata, npm_snapshot: Option, - remote_modules_store: &RemoteModulesStoreBuilder, + remote_modules: &RemoteModulesStoreBuilder, vfs: VfsBuilder, compile_flags: &CompileFlags, ) -> Result<(), AnyError> { - let metadata = serde_json::to_string(metadata)?.as_bytes().to_vec(); - let npm_snapshot = - npm_snapshot.map(serialize_npm_snapshot).unwrap_or_default(); - let (vfs, vfs_files) = vfs.into_dir_and_files(); - let vfs = serde_json::to_string(&vfs)?.as_bytes().to_vec(); - - let mut writer = Vec::new(); - - // write the trailer, which includes the positions - // of the data blocks in the file - writer.write_all(&{ - let npm_snapshot_pos = metadata.len() as u64; - let remote_modules_pos = npm_snapshot_pos + (npm_snapshot.len() as u64); - let vfs_pos = remote_modules_pos + remote_modules_store.total_len(); - let files_pos = vfs_pos + (vfs.len() as u64); - Trailer { - metadata_pos: 0, - npm_snapshot_pos, - remote_modules_pos, - vfs_pos, - files_pos, - } - .as_bytes() - })?; - - writer.write_all(&metadata)?; - writer.write_all(&npm_snapshot)?; - remote_modules_store.write(&mut writer)?; - writer.write_all(&vfs)?; - for file in &vfs_files { - writer.write_all(file)?; - } + let data_section_bytes = + serialize_binary_data_section(metadata, npm_snapshot, remote_modules, vfs)?; let target = compile_flags.resolve_target(); if target.contains("linux") { libsui::Elf::new(&original_bin).append( "d3n0l4nd", - &writer, + &data_section_bytes, &mut file_writer, )?; } else if target.contains("windows") { @@ -235,11 +211,11 @@ fn write_binary_bytes( pe = pe.set_icon(&icon)?; } - pe.write_resource("d3n0l4nd", writer)? + pe.write_resource("d3n0l4nd", data_section_bytes)? .build(&mut file_writer)?; } else if target.contains("darwin") { libsui::Macho::from(original_bin)? - .write_section("d3n0l4nd", writer)? + .write_section("d3n0l4nd", data_section_bytes)? .build_and_sign(&mut file_writer)?; } Ok(()) @@ -264,12 +240,6 @@ pub struct StandaloneData { pub vfs: Arc, } -pub struct RemoteModuleData<'a> { - pub specifier: &'a ModuleSpecifier, - pub media_type: MediaType, - pub data: Cow<'static, [u8]>, -} - pub struct StandaloneModules { remote_modules: RemoteModulesStore, vfs: Arc, @@ -281,13 +251,12 @@ impl StandaloneModules { specifier: &'a ModuleSpecifier, ) -> Result, AnyError> { if specifier.scheme() == "file" { - return Ok(Some(specifier)); + Ok(Some(specifier)) } else { self.remote_modules.resolve_specifier(specifier) } } - // todo(THIS PR): don't return Option? pub fn read<'a>( &'a self, specifier: &'a ModuleSpecifier, @@ -297,7 +266,13 @@ impl StandaloneModules { let bytes = match self.vfs.file_entry(&path) { Ok(entry) => self.vfs.read_file_all(entry)?, Err(err) if err.kind() == ErrorKind::NotFound => { - let bytes = RealFs.read_file_sync(&path, None)?; + let bytes = match RealFs.read_file_sync(&path, None) { + Ok(bytes) => bytes, + Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => { + return Ok(None) + } + Err(err) => return Err(err.into()), + }; Cow::Owned(bytes) } Err(err) => return Err(err.into()), @@ -325,12 +300,16 @@ pub fn extract_standalone( return Ok(None); }; - // We do the first part sync so it can complete quickly - let trailer = match Trailer::parse(&data[0..TRAILER_SIZE])? { + let DeserializedDataSection { + mut metadata, + npm_snapshot, + remote_modules, + mut vfs_dir, + vfs_files_data, + } = match deserialize_binary_data_section(data)? { + Some(data_section) => data_section, None => return Ok(None), - Some(trailer) => trailer, }; - let data = &data[TRAILER_SIZE..]; let root_path = { let current_exe_path = std::env::current_exe().unwrap(); @@ -339,35 +318,20 @@ pub fn extract_standalone( std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)) }; let cli_args = cli_args.into_owned(); - let mut metadata: Metadata = - serde_json::from_slice(&data[trailer.metadata_range()]) - .context("failed reading metadata")?; metadata.argv.reserve(cli_args.len() - 1); for arg in cli_args.into_iter().skip(1) { metadata.argv.push(arg.into_string().unwrap()); } - let remote_modules = - RemoteModulesStore::build(&data[trailer.remote_modules_range()])?; - let npm_snapshot_bytes = &data[trailer.npm_snapshot_range()]; - let npm_snapshot = if npm_snapshot_bytes.is_empty() { - None - } else { - Some(deserialize_npm_snapshot(npm_snapshot_bytes)?) - }; let vfs = { - let vfs_data = &data[trailer.vfs_range()]; - let mut dir: VirtualDirectory = - serde_json::from_slice(vfs_data).context("failed reading vfs data")?; - // align the name of the directory with the root dir - dir.name = root_path.file_name().unwrap().to_string_lossy().to_string(); + vfs_dir.name = root_path.file_name().unwrap().to_string_lossy().to_string(); let fs_root = VfsRoot { - dir, + dir: vfs_dir, root_path: root_path.clone(), - start_file_offset: trailer.files_pos, + start_file_offset: 0, }; - Arc::new(FileBackedVfs::new(Cow::Borrowed(data), fs_root)) + Arc::new(FileBackedVfs::new(Cow::Borrowed(vfs_files_data), fs_root)) }; let fs: Arc = Arc::new(DenoCompileFileSystem::new(vfs.clone())); @@ -384,71 +348,6 @@ pub fn extract_standalone( })) } -const TRAILER_SIZE: usize = std::mem::size_of::() + 8; // 8 bytes for the magic trailer string - -struct Trailer { - metadata_pos: u64, - npm_snapshot_pos: u64, - remote_modules_pos: u64, - vfs_pos: u64, - files_pos: u64, -} - -impl Trailer { - pub fn parse(trailer: &[u8]) -> Result, AnyError> { - let (magic_trailer, rest) = trailer.split_at(8); - if magic_trailer != MAGIC_TRAILER { - return Ok(None); - } - - let (metadata_pos, rest) = rest.split_at(8); - let (npm_snapshot_pos, rest) = rest.split_at(8); - let (remote_modules_pos, rest) = rest.split_at(8); - let (vfs_pos, files_pos) = rest.split_at(8); - Ok(Some(Trailer { - metadata_pos: u64_from_bytes(metadata_pos)?, - npm_snapshot_pos: u64_from_bytes(npm_snapshot_pos)?, - remote_modules_pos: u64_from_bytes(remote_modules_pos)?, - vfs_pos: u64_from_bytes(vfs_pos)?, - files_pos: u64_from_bytes(files_pos)?, - })) - } - - pub fn metadata_len(&self) -> u64 { - self.npm_snapshot_pos - self.metadata_pos - } - - pub fn metadata_range(&self) -> Range { - self.metadata_pos as usize..self.npm_snapshot_pos as usize - } - - pub fn npm_snapshot_range(&self) -> Range { - self.npm_snapshot_pos as usize..self.remote_modules_pos as usize - } - - pub fn remote_modules_range(&self) -> Range { - self.remote_modules_pos as usize..self.vfs_pos as usize - } - - pub fn vfs_range(&self) -> Range { - self.vfs_pos as usize..self.files_pos as usize - } - - pub fn as_bytes(&self) -> Vec { - let mut trailer = MAGIC_TRAILER.to_vec(); - trailer.write_all(&self.metadata_pos.to_be_bytes()).unwrap(); - trailer - .write_all(&self.npm_snapshot_pos.to_be_bytes()) - .unwrap(); - trailer - .write_all(&self.remote_modules_pos.to_be_bytes()) - .unwrap(); - trailer.write_all(&self.vfs_pos.to_be_bytes()).unwrap(); - trailer.write_all(&self.files_pos.to_be_bytes()).unwrap(); - trailer - } -} - fn u64_from_bytes(arr: &[u8]) -> Result { let fixed_arr: &[u8; 8] = arr .try_into() @@ -458,6 +357,7 @@ fn u64_from_bytes(arr: &[u8]) -> Result { pub struct DenoCompileBinaryWriter<'a> { deno_dir: &'a DenoDir, + emitter: &'a Emitter, file_fetcher: &'a FileFetcher, http_client_provider: &'a HttpClientProvider, npm_resolver: &'a dyn CliNpmResolver, @@ -469,6 +369,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { #[allow(clippy::too_many_arguments)] pub fn new( deno_dir: &'a DenoDir, + emitter: &'a Emitter, file_fetcher: &'a FileFetcher, http_client_provider: &'a HttpClientProvider, npm_resolver: &'a dyn CliNpmResolver, @@ -477,6 +378,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { ) -> Self { Self { deno_dir, + emitter, file_fetcher, http_client_provider, npm_resolver, @@ -516,15 +418,17 @@ impl<'a> DenoCompileBinaryWriter<'a> { ) } } - self.write_standalone_binary( - writer, - original_binary, - graph, - root_dir_url, - entrypoint, - cli_options, - compile_flags, - ) + self + .write_standalone_binary( + writer, + original_binary, + graph, + root_dir_url, + entrypoint, + cli_options, + compile_flags, + ) + .await } async fn get_base_binary( @@ -619,7 +523,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { /// This functions creates a standalone deno binary by appending a bundle /// and magic trailer to the currently executing binary. #[allow(clippy::too_many_arguments)] - fn write_standalone_binary( + async fn write_standalone_binary( &self, writer: File, original_bin: Vec, @@ -694,28 +598,46 @@ impl<'a> DenoCompileBinaryWriter<'a> { }; let mut remote_modules_store = RemoteModulesStoreBuilder::default(); for module in graph.modules() { + let (maybe_source, media_type) = match module { + deno_graph::Module::Js(m) => { + // todo(https://github.com/denoland/deno_media_type/pull/12): use is_emittable() + let is_emittable = matches!( + m.media_type, + MediaType::TypeScript + | MediaType::Mts + | MediaType::Cts + | MediaType::Jsx + | MediaType::Tsx + ); + let source = if is_emittable { + let source = self + .emitter + .emit_parsed_source(&m.specifier, m.media_type, &m.source) + .await?; + source.to_vec() + } else { + m.source.as_bytes().to_vec() + }; + (Some(source), m.media_type) + } + deno_graph::Module::Json(m) => { + (Some(m.source.as_bytes().to_vec()), m.media_type) + } + deno_graph::Module::Npm(_) + | deno_graph::Module::Node(_) + | deno_graph::Module::External(_) => (None, MediaType::Unknown), + }; if module.specifier().scheme() == "file" { let file_path = deno_path_util::url_to_file_path(module.specifier())?; vfs.add_file_with_data( &file_path, - match module.source() { - Some(source) => source.as_bytes().to_vec(), - None => Vec::new(), + match maybe_source { + Some(source) => source, + None => RealFs.read_file_sync(&file_path, None)?, }, )?; - } else if let Some(source) = module.source() { - let media_type = match module { - deno_graph::Module::Js(m) => m.media_type, - deno_graph::Module::Json(m) => m.media_type, - deno_graph::Module::Npm(_) - | deno_graph::Module::Node(_) - | deno_graph::Module::External(_) => MediaType::Unknown, - }; - remote_modules_store.add( - module.specifier(), - media_type, - source.as_bytes().to_vec(), - ); + } else if let Some(source) = maybe_source { + remote_modules_store.add(module.specifier(), media_type, source); } } remote_modules_store.add_redirects(&graph.redirects); @@ -910,235 +832,6 @@ impl<'a> DenoCompileBinaryWriter<'a> { } } -enum RemoteModulesStoreSpecifierValue { - Data(usize), - Redirect(ModuleSpecifier), -} - -pub struct RemoteModulesStore { - specifiers: HashMap, - files_data: &'static [u8], -} - -impl RemoteModulesStore { - pub fn build(data: &'static [u8]) -> Result { - fn read_specifier( - input: &[u8], - ) -> Result<(&[u8], (ModuleSpecifier, u64)), AnyError> { - let (input, specifier) = read_string_lossy(input)?; - let specifier = ModuleSpecifier::parse(&specifier)?; - let (input, offset) = read_u64(input)?; - Ok((input, (specifier, offset))) - } - - fn read_redirect( - input: &[u8], - ) -> Result<(&[u8], (ModuleSpecifier, ModuleSpecifier)), AnyError> { - let (input, from) = read_string_lossy(input)?; - let from = ModuleSpecifier::parse(&from)?; - let (input, to) = read_string_lossy(input)?; - let to = ModuleSpecifier::parse(&to)?; - Ok((input, (from, to))) - } - - fn read_headers( - input: &[u8], - ) -> Result< - ( - &[u8], - HashMap, - ), - AnyError, - > { - let (input, specifiers_len) = read_u32_as_usize(input)?; - let (mut input, redirects_len) = read_u32_as_usize(input)?; - let mut specifiers = - HashMap::with_capacity(specifiers_len + redirects_len); - for _ in 0..specifiers_len { - let (current_input, (specifier, offset)) = read_specifier(input)?; - input = current_input; - specifiers.insert( - specifier, - RemoteModulesStoreSpecifierValue::Data(offset as usize), - ); - } - - for _ in 0..redirects_len { - let (current_input, (from, to)) = read_redirect(input)?; - input = current_input; - specifiers.insert(from, RemoteModulesStoreSpecifierValue::Redirect(to)); - } - - Ok((input, specifiers)) - } - - let (files_data, specifiers) = read_headers(data)?; - - Ok(Self { - specifiers, - files_data, - }) - } - - pub fn resolve_specifier<'a>( - &'a self, - specifier: &'a ModuleSpecifier, - ) -> Result, AnyError> { - let mut count = 0; - let mut current = specifier; - loop { - if count > 10 { - bail!("Too many redirects resolving '{}'", specifier); - } - match self.specifiers.get(current) { - Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { - current = to; - count += 1; - } - Some(RemoteModulesStoreSpecifierValue::Data(_)) => { - return Ok(Some(current)); - } - None => { - return Ok(None); - } - } - } - } - - pub fn read<'a>( - &'a self, - specifier: &'a ModuleSpecifier, - ) -> Result>, AnyError> { - let mut count = 0; - let mut current = specifier; - loop { - if count > 10 { - bail!("Too many redirects resolving '{}'", specifier); - } - match self.specifiers.get(current) { - Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { - current = to; - count += 1; - } - Some(RemoteModulesStoreSpecifierValue::Data(offset)) => { - let files_data = &self.files_data[*offset..]; - let media_type = deserialize_media_type(files_data[0])?; - let (input, len) = read_u64(&files_data[1..])?; - let data = &input[..len as usize]; - return Ok(Some(RemoteModuleData { - specifier, - media_type, - data: Cow::Borrowed(data), - })); - } - None => { - return Ok(None); - } - } - } - } -} - -// todo(THIS PR): make this better -fn serialize_media_type(media_type: MediaType) -> u8 { - match media_type { - MediaType::JavaScript => 0, - MediaType::Jsx => 1, - MediaType::Mjs => 2, - MediaType::Cjs => 3, - MediaType::TypeScript => 4, - MediaType::Mts => 5, - MediaType::Cts => 6, - MediaType::Dts => 7, - MediaType::Dmts => 8, - MediaType::Dcts => 9, - MediaType::Tsx => 10, - MediaType::Json => 11, - MediaType::Wasm => 12, - MediaType::TsBuildInfo => 13, - MediaType::SourceMap => 14, - MediaType::Unknown => 15, - } -} - -fn deserialize_media_type(value: u8) -> Result { - match value { - 0 => Ok(MediaType::JavaScript), - 1 => Ok(MediaType::Jsx), - 2 => Ok(MediaType::Mjs), - 3 => Ok(MediaType::Cjs), - 4 => Ok(MediaType::TypeScript), - 5 => Ok(MediaType::Mts), - 6 => Ok(MediaType::Cts), - 7 => Ok(MediaType::Dts), - 8 => Ok(MediaType::Dmts), - 9 => Ok(MediaType::Dcts), - 10 => Ok(MediaType::Tsx), - 11 => Ok(MediaType::Json), - 12 => Ok(MediaType::Wasm), - 13 => Ok(MediaType::TsBuildInfo), - 14 => Ok(MediaType::SourceMap), - 15 => Ok(MediaType::Unknown), - _ => bail!("Unknown media type value: {}", value), - } -} - -#[derive(Default)] -struct RemoteModulesStoreBuilder { - specifiers: Vec<(String, u64)>, - data: Vec<(MediaType, Vec)>, - specifiers_byte_len: u64, - data_byte_len: u64, - redirects: Vec<(String, String)>, - redirects_len: u64, -} - -impl RemoteModulesStoreBuilder { - pub fn add(&mut self, specifier: &Url, media_type: MediaType, data: Vec) { - let specifier = specifier.to_string(); - self.specifiers_byte_len += 4 + specifier.len() as u64 + 8; - self.specifiers.push((specifier, self.data_byte_len)); - self.data_byte_len += 1 + 8 + data.len() as u64; - self.data.push((media_type, data)); - } - - pub fn add_redirects(&mut self, redirects: &BTreeMap) { - self.redirects.reserve(redirects.len()); - for (from, to) in redirects { - let from = from.to_string(); - let to = to.to_string(); - self.redirects_len += (4 + from.len() + 4 + to.len()) as u64; - self.redirects.push((from, to)); - } - } - - pub fn total_len(&self) -> u64 { - 4 + 4 + self.specifiers_byte_len + self.redirects_len + self.data_byte_len - } - - pub fn write(&self, writer: &mut dyn Write) -> Result<(), AnyError> { - writer.write_all(&(self.specifiers.len() as u32).to_be_bytes())?; - writer.write_all(&(self.redirects.len() as u32).to_be_bytes())?; - for (specifier, offset) in &self.specifiers { - writer.write_all(&(specifier.len() as u32).to_be_bytes())?; - writer.write_all(specifier.as_bytes())?; - writer.write_all(&offset.to_be_bytes())?; - } - for (from, to) in &self.redirects { - writer.write_all(&(from.len() as u32).to_be_bytes())?; - writer.write_all(from.as_bytes())?; - writer.write_all(&(to.len() as u32).to_be_bytes())?; - writer.write_all(to.as_bytes())?; - } - for (media_type, data) in &self.data { - writer.write_all(&[serialize_media_type(*media_type)])?; - writer.write_all(&(data.len() as u32).to_be_bytes())?; - writer.write_all(data)?; - } - Ok(()) - } -} - /// This function returns the environment variables specified /// in the passed environment file. fn get_file_env_vars( @@ -1192,153 +885,3 @@ fn set_windows_binary_to_gui(bin: &mut [u8]) -> Result<(), AnyError> { .copy_from_slice(&subsystem.to_le_bytes()); Ok(()) } - -fn serialize_npm_snapshot( - mut snapshot: SerializedNpmResolutionSnapshot, -) -> Vec { - fn append_string(bytes: &mut Vec, string: &str) { - let len = string.len() as u32; - bytes.extend_from_slice(&len.to_be_bytes()); - bytes.extend_from_slice(string.as_bytes()); - } - - snapshot.packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism - let ids_to_stored_ids = snapshot - .packages - .iter() - .enumerate() - .map(|(i, pkg)| (&pkg.id, i as u32)) - .collect::>(); - - let mut root_packages: Vec<_> = snapshot.root_packages.iter().collect(); - root_packages.sort(); - let mut bytes = Vec::new(); - - bytes.extend(&(snapshot.packages.len() as u32).to_be_bytes()); - for pkg in &snapshot.packages { - append_string(&mut bytes, &pkg.id.as_serialized()); - } - - bytes.extend(&(root_packages.len() as u32).to_be_bytes()); - for (req, id) in root_packages { - append_string(&mut bytes, &req.to_string()); - let id = ids_to_stored_ids.get(&id).unwrap(); - bytes.extend_from_slice(&id.to_be_bytes()); - } - - for pkg in &snapshot.packages { - let deps_len = pkg.dependencies.len() as u32; - bytes.extend_from_slice(&deps_len.to_be_bytes()); - let mut deps: Vec<_> = pkg.dependencies.iter().collect(); - deps.sort(); - for (req, id) in deps { - append_string(&mut bytes, req); - let id = ids_to_stored_ids.get(&id).unwrap(); - bytes.extend_from_slice(&id.to_be_bytes()); - } - } - - bytes -} - -fn deserialize_npm_snapshot( - data: &[u8], -) -> Result { - fn read_root_package( - data: &[u8], - ) -> Result<(&[u8], (PackageReq, usize)), AnyError> { - let (data, req) = read_string_lossy(data)?; - let req = PackageReq::from_str(&req)?; - let (data, id) = read_u32_as_usize(data)?; - Ok((data, (req, id))) - } - - let (mut data, packages_len) = read_u32_as_usize(data)?; - - // get a hashmap of all the npm package ids to their serialized ids - let mut data_ids_to_npm_ids = Vec::with_capacity(packages_len); - for _ in 0..packages_len { - let (current_data, id) = read_string_lossy(data)?; - data = current_data; - let id = NpmPackageId::from_serialized(&id)?; - data_ids_to_npm_ids.push(id); - } - - let (mut data, root_packages_len) = read_u32_as_usize(data)?; - let mut root_packages = HashMap::with_capacity(root_packages_len); - for _ in 0..root_packages_len { - let (current_data, (req, id)) = read_root_package(data)?; - data = current_data; - root_packages.insert(req, data_ids_to_npm_ids[id].clone()); - } - - let mut packages = Vec::with_capacity(packages_len); - for _ in 0..packages_len { - let (current_data, id) = read_u32_as_usize(data)?; - data = current_data; - let id = data_ids_to_npm_ids[id].clone(); - let (current_data, deps_len) = read_u32_as_usize(data)?; - data = current_data; - let mut dependencies = HashMap::with_capacity(deps_len); - for _ in 0..deps_len { - let (current_data, req) = read_string_lossy(data)?; - data = current_data; - let (current_data, id) = read_u32_as_usize(data)?; - data = current_data; - // todo(THIS PR): handle when id >= data_ids_to_npm_ids.len() - dependencies.insert(req.into_owned(), data_ids_to_npm_ids[id].clone()); - } - - packages.push(SerializedNpmResolutionSnapshotPackage { - id: id, - system: Default::default(), - dist: Default::default(), - dependencies, - optional_dependencies: Default::default(), - bin: None, - scripts: Default::default(), - deprecated: Default::default(), - }); - } - - if !data.is_empty() { - bail!("Unexpected data left over"); - } - - Ok( - SerializedNpmResolutionSnapshot { - packages, - root_packages, - } - // this is ok because we have already verified that all the - // identifiers found in the snapshot are valid via the - // npm package id -> npm package id mapping - .into_valid_unsafe(), - ) -} - -fn read_string_lossy(data: &[u8]) -> Result<(&[u8], Cow), AnyError> { - let (data, str_len) = read_u32_as_usize(data)?; - if data.len() < str_len { - bail!("Unexpected end of data"); - } - Ok((data, String::from_utf8_lossy(&data[..str_len]))) -} - -fn read_u32_as_usize(data: &[u8]) -> Result<(&[u8], usize), AnyError> { - if data.len() < 4 { - bail!("Unexpected end of data"); - } - let (len_bytes, rest) = data.split_at(4); - let len = u32::from_be_bytes(len_bytes.try_into()?); - Ok((rest, len as usize)) -} - -fn read_u64(data: &[u8]) -> Result<(&[u8], u64), AnyError> { - if data.len() < 8 { - bail!("Unexpected end of data"); - } - let (len_bytes, rest) = data.split_at(8); - let len = u64::from_be_bytes(len_bytes.try_into()?); - Ok((rest, len)) -} diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 8604feef03ff4d..d416637c9ee955 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -79,6 +79,7 @@ use crate::worker::ModuleLoaderFactory; pub mod binary; mod file_system; +mod serialization; mod virtual_fs; pub use binary::extract_standalone; @@ -314,51 +315,42 @@ impl ModuleLoader for EmbeddedModuleLoader { } match self.shared.modules.read(original_specifier) { - Ok(Some(module)) => { - return deno_core::ModuleLoadResponse::Sync(Ok( - deno_core::ModuleSource::new_with_redirect( - match module.media_type { - MediaType::JavaScript - | MediaType::Jsx - | MediaType::Mjs - | MediaType::Cjs - | MediaType::TypeScript - | MediaType::Mts - | MediaType::Cts - | MediaType::Dts - | MediaType::Dmts - | MediaType::Dcts - | MediaType::Tsx => ModuleType::JavaScript, - MediaType::Json => ModuleType::Json, - MediaType::Wasm => ModuleType::Wasm, - MediaType::TsBuildInfo - | MediaType::SourceMap - | MediaType::Unknown => { - unreachable!(); - } - }, - ModuleSourceCode::Bytes(match module.data { - Cow::Borrowed(d) => d.into(), - Cow::Owned(d) => d.into_boxed_slice().into(), - }), - &original_specifier, - &module.specifier, - None, - ), - )) - } - Ok(None) => { - return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( - "{MODULE_NOT_FOUND}: {}", - original_specifier - )))); - } - Err(err) => { - return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( - "{:?}", - err - )))); - } + Ok(Some(module)) => deno_core::ModuleLoadResponse::Sync(Ok( + deno_core::ModuleSource::new_with_redirect( + match module.media_type { + MediaType::JavaScript + | MediaType::Jsx + | MediaType::Mjs + | MediaType::Cjs + | MediaType::TypeScript + | MediaType::Mts + | MediaType::Cts + | MediaType::Dts + | MediaType::Dmts + | MediaType::Dcts + | MediaType::Tsx => ModuleType::JavaScript, + MediaType::Json => ModuleType::Json, + MediaType::Wasm => ModuleType::Wasm, + // just assume javascript if we made it here + MediaType::TsBuildInfo + | MediaType::SourceMap + | MediaType::Unknown => ModuleType::JavaScript, + }, + ModuleSourceCode::Bytes(match module.data { + Cow::Borrowed(d) => d.into(), + Cow::Owned(d) => d.into_boxed_slice().into(), + }), + original_specifier, + module.specifier, + None, + ), + )), + Ok(None) => deno_core::ModuleLoadResponse::Sync(Err(type_error( + format!("{MODULE_NOT_FOUND}: {}", original_specifier), + ))), + Err(err) => deno_core::ModuleLoadResponse::Sync(Err(type_error( + format!("{:?}", err), + ))), } } } diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs new file mode 100644 index 00000000000000..ce1eb2682f5ab4 --- /dev/null +++ b/cli/standalone/serialization.rs @@ -0,0 +1,590 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::io::Write; + +use deno_ast::MediaType; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::AnyError; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::resolution::SerializedNpmResolutionSnapshot; +use deno_npm::resolution::SerializedNpmResolutionSnapshotPackage; +use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; +use deno_npm::NpmPackageId; +use deno_semver::package::PackageReq; + +use crate::standalone::virtual_fs::VirtualDirectory; + +use super::binary::Metadata; +use super::virtual_fs::VfsBuilder; + +const MAGIC_BYTES: &[u8; 8] = b"d3n0l4nd"; + +/// Binary format: +/// * d3n0l4nd +/// * +/// * +/// * +/// * +/// * +/// * d3n0l4nd +pub fn serialize_binary_data_section( + metadata: &Metadata, + npm_snapshot: Option, + remote_modules: &RemoteModulesStoreBuilder, + vfs: VfsBuilder, +) -> Result, AnyError> { + fn write_bytes_with_len(bytes: &mut Vec, data: &[u8]) { + bytes.extend_from_slice(&(data.len() as u64).to_be_bytes()); + bytes.extend_from_slice(data); + } + + let mut bytes = Vec::new(); + bytes.extend_from_slice(MAGIC_BYTES); + + // 1. Metadata + { + let metadata = serde_json::to_string(metadata)?; + write_bytes_with_len(&mut bytes, metadata.as_bytes()); + } + // 2. Npm snapshot + { + let npm_snapshot = + npm_snapshot.map(serialize_npm_snapshot).unwrap_or_default(); + write_bytes_with_len(&mut bytes, &npm_snapshot); + } + // 3. Remote modules + { + let update_index = bytes.len(); + bytes.extend_from_slice(&(0_u64).to_be_bytes()); + let start_index = bytes.len(); + remote_modules.write(&mut bytes)?; + let length = bytes.len() - start_index; + let length_bytes = (length as u64).to_be_bytes(); + bytes[update_index..update_index + length_bytes.len()] + .copy_from_slice(&length_bytes); + } + // 4. VFS + { + let (vfs, vfs_files) = vfs.into_dir_and_files(); + let vfs = serde_json::to_string(&vfs)?; + write_bytes_with_len(&mut bytes, vfs.as_bytes()); + let vfs_bytes_len = vfs_files.iter().map(|f| f.len() as u64).sum::(); + bytes.extend_from_slice(&vfs_bytes_len.to_be_bytes()); + for file in &vfs_files { + bytes.extend_from_slice(file); + } + } + + // write the magic bytes at the end so we can use it + // to make sure we've deserialized correctly + bytes.extend_from_slice(MAGIC_BYTES); + + Ok(bytes) +} + +pub struct DeserializedDataSection { + pub metadata: Metadata, + pub npm_snapshot: Option, + pub remote_modules: RemoteModulesStore, + pub vfs_dir: VirtualDirectory, + pub vfs_files_data: &'static [u8], +} + +pub fn deserialize_binary_data_section( + data: &'static [u8], +) -> Result, AnyError> { + fn read_bytes_with_len(input: &[u8]) -> Result<(&[u8], &[u8]), AnyError> { + let (input, len) = read_u64(input)?; + let (input, data) = read_bytes(input, len as usize)?; + Ok((input, data)) + } + + fn read_magic_bytes(input: &[u8]) -> Result<(&[u8], bool), AnyError> { + if input.len() < MAGIC_BYTES.len() { + bail!("Unexpected end of data. Could not find magic bytes."); + } + let (magic_bytes, input) = input.split_at(MAGIC_BYTES.len()); + if magic_bytes != MAGIC_BYTES { + return Ok((input, false)); + } + Ok((input, true)) + } + + let (input, found) = read_magic_bytes(data)?; + if !found { + return Ok(None); + } + + // 1. Metadata + let (input, data) = read_bytes_with_len(input).context("reading metadata")?; + let metadata: Metadata = + serde_json::from_slice(data).context("deserializing metadata")?; + // 2. Npm snapshot + let (input, data) = + read_bytes_with_len(input).context("reading npm snapshot")?; + let npm_snapshot = if data.is_empty() { + None + } else { + Some(deserialize_npm_snapshot(data).context("deserializing npm snapshot")?) + }; + // 3. Remote modules + let (input, data) = + read_bytes_with_len(input).context("reading remote modules data")?; + let remote_modules = + RemoteModulesStore::build(data).context("deserializing remote modules")?; + // 4. VFS + let (input, data) = read_bytes_with_len(input).context("vfs")?; + let vfs_dir: VirtualDirectory = + serde_json::from_slice(data).context("deserializing vfs data")?; + let (input, vfs_files_data) = + read_bytes_with_len(input).context("reading vfs files data")?; + + // finally ensure we read the magic bytes at the end + let (_input, found) = read_magic_bytes(input)?; + if !found { + bail!("Could not find magic bytes at the end of the data."); + } + + Ok(Some(DeserializedDataSection { + metadata, + npm_snapshot, + remote_modules, + vfs_dir, + vfs_files_data, + })) +} + +#[derive(Default)] +pub struct RemoteModulesStoreBuilder { + specifiers: Vec<(String, u64)>, + data: Vec<(MediaType, Vec)>, + data_byte_len: u64, + redirects: Vec<(String, String)>, + redirects_len: u64, +} + +impl RemoteModulesStoreBuilder { + pub fn add(&mut self, specifier: &Url, media_type: MediaType, data: Vec) { + let specifier = specifier.to_string(); + self.specifiers.push((specifier, self.data_byte_len)); + self.data_byte_len += 1 + 8 + data.len() as u64; // media type (1 byte), data length (8 bytes), data + self.data.push((media_type, data)); + } + + pub fn add_redirects(&mut self, redirects: &BTreeMap) { + self.redirects.reserve(redirects.len()); + for (from, to) in redirects { + let from = from.to_string(); + let to = to.to_string(); + self.redirects_len += (4 + from.len() + 4 + to.len()) as u64; + self.redirects.push((from, to)); + } + } + + fn write(&self, writer: &mut dyn Write) -> Result<(), AnyError> { + writer.write_all(&(self.specifiers.len() as u32).to_be_bytes())?; + writer.write_all(&(self.redirects.len() as u32).to_be_bytes())?; + for (specifier, offset) in &self.specifiers { + writer.write_all(&(specifier.len() as u32).to_be_bytes())?; + writer.write_all(specifier.as_bytes())?; + writer.write_all(&offset.to_be_bytes())?; + } + for (from, to) in &self.redirects { + writer.write_all(&(from.len() as u32).to_be_bytes())?; + writer.write_all(from.as_bytes())?; + writer.write_all(&(to.len() as u32).to_be_bytes())?; + writer.write_all(to.as_bytes())?; + } + for (media_type, data) in &self.data { + writer.write_all(&[serialize_media_type(*media_type)])?; + writer.write_all(&(data.len() as u64).to_be_bytes())?; + writer.write_all(data)?; + } + Ok(()) + } +} + +pub struct RemoteModuleData<'a> { + pub specifier: &'a Url, + pub media_type: MediaType, + pub data: Cow<'static, [u8]>, +} + +enum RemoteModulesStoreSpecifierValue { + Data(usize), + Redirect(Url), +} + +pub struct RemoteModulesStore { + specifiers: HashMap, + files_data: &'static [u8], +} + +impl RemoteModulesStore { + fn build(data: &'static [u8]) -> Result { + fn read_specifier(input: &[u8]) -> Result<(&[u8], (Url, u64)), AnyError> { + let (input, specifier) = read_string_lossy(input)?; + let specifier = Url::parse(&specifier)?; + let (input, offset) = read_u64(input)?; + Ok((input, (specifier, offset))) + } + + fn read_redirect(input: &[u8]) -> Result<(&[u8], (Url, Url)), AnyError> { + let (input, from) = read_string_lossy(input)?; + let from = Url::parse(&from)?; + let (input, to) = read_string_lossy(input)?; + let to = Url::parse(&to)?; + Ok((input, (from, to))) + } + + fn read_headers( + input: &[u8], + ) -> Result<(&[u8], HashMap), AnyError> + { + let (input, specifiers_len) = read_u32_as_usize(input)?; + let (mut input, redirects_len) = read_u32_as_usize(input)?; + let mut specifiers = + HashMap::with_capacity(specifiers_len + redirects_len); + for _ in 0..specifiers_len { + let (current_input, (specifier, offset)) = + read_specifier(input).context("reading specifier")?; + input = current_input; + specifiers.insert( + specifier, + RemoteModulesStoreSpecifierValue::Data(offset as usize), + ); + } + + for _ in 0..redirects_len { + let (current_input, (from, to)) = read_redirect(input)?; + input = current_input; + specifiers.insert(from, RemoteModulesStoreSpecifierValue::Redirect(to)); + } + + Ok((input, specifiers)) + } + + let (files_data, specifiers) = read_headers(data)?; + + Ok(Self { + specifiers, + files_data, + }) + } + + pub fn resolve_specifier<'a>( + &'a self, + specifier: &'a Url, + ) -> Result, AnyError> { + let mut count = 0; + let mut current = specifier; + loop { + if count > 10 { + bail!("Too many redirects resolving '{}'", specifier); + } + match self.specifiers.get(current) { + Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { + current = to; + count += 1; + } + Some(RemoteModulesStoreSpecifierValue::Data(_)) => { + return Ok(Some(current)); + } + None => { + return Ok(None); + } + } + } + } + + pub fn read<'a>( + &'a self, + specifier: &'a Url, + ) -> Result>, AnyError> { + let mut count = 0; + let mut current = specifier; + loop { + if count > 10 { + bail!("Too many redirects resolving '{}'", specifier); + } + match self.specifiers.get(current) { + Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { + current = to; + count += 1; + } + Some(RemoteModulesStoreSpecifierValue::Data(offset)) => { + let input = &self.files_data[*offset..]; + let (input, media_type_byte) = read_bytes(input, 1)?; + let media_type = deserialize_media_type(media_type_byte[0])?; + let (input, len) = read_u64(input)?; + let (_input, data) = read_bytes(input, len as usize)?; + return Ok(Some(RemoteModuleData { + specifier, + media_type, + data: Cow::Borrowed(data), + })); + } + None => { + return Ok(None); + } + } + } + } +} + +fn serialize_npm_snapshot( + mut snapshot: SerializedNpmResolutionSnapshot, +) -> Vec { + fn append_string(bytes: &mut Vec, string: &str) { + let len = string.len() as u32; + bytes.extend_from_slice(&len.to_be_bytes()); + bytes.extend_from_slice(string.as_bytes()); + } + + snapshot.packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism + let ids_to_stored_ids = snapshot + .packages + .iter() + .enumerate() + .map(|(i, pkg)| (&pkg.id, i as u32)) + .collect::>(); + + let mut root_packages: Vec<_> = snapshot.root_packages.iter().collect(); + root_packages.sort(); + let mut bytes = Vec::new(); + + bytes.extend_from_slice(&(snapshot.packages.len() as u32).to_be_bytes()); + for pkg in &snapshot.packages { + append_string(&mut bytes, &pkg.id.as_serialized()); + } + + bytes.extend_from_slice(&(root_packages.len() as u32).to_be_bytes()); + for (req, id) in root_packages { + append_string(&mut bytes, &req.to_string()); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_be_bytes()); + } + + for pkg in &snapshot.packages { + let deps_len = pkg.dependencies.len() as u32; + bytes.extend_from_slice(&deps_len.to_be_bytes()); + let mut deps: Vec<_> = pkg.dependencies.iter().collect(); + deps.sort(); + for (req, id) in deps { + append_string(&mut bytes, req); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_be_bytes()); + } + } + + bytes +} + +fn deserialize_npm_snapshot( + input: &[u8], +) -> Result { + fn parse_id(input: &[u8]) -> Result<(&[u8], NpmPackageId), AnyError> { + let (input, id) = read_string_lossy(input)?; + let id = NpmPackageId::from_serialized(&id)?; + Ok((input, id)) + } + + fn parse_root_package<'a>( + id_to_npm_id: &'a impl Fn(usize) -> Result, + ) -> impl Fn(&[u8]) -> Result<(&[u8], (PackageReq, NpmPackageId)), AnyError> + 'a + { + |input| { + let (input, req) = read_string_lossy(input)?; + let req = PackageReq::from_str(&req)?; + let (input, id) = read_u32_as_usize(input)?; + Ok((input, (req, id_to_npm_id(id)?))) + } + } + + fn parse_package_dep<'a>( + id_to_npm_id: &'a impl Fn(usize) -> Result, + ) -> impl Fn(&[u8]) -> Result<(&[u8], (String, NpmPackageId)), AnyError> + 'a + { + |input| { + let (input, req) = read_string_lossy(input)?; + let (input, id) = read_u32_as_usize(input)?; + Ok((input, (req.into_owned(), id_to_npm_id(id)?))) + } + } + + fn parse_package<'a>( + input: &'a [u8], + id: NpmPackageId, + id_to_npm_id: &impl Fn(usize) -> Result, + ) -> Result<(&'a [u8], SerializedNpmResolutionSnapshotPackage), AnyError> { + let (input, deps_len) = read_u32_as_usize(input)?; + let (input, dependencies) = + parse_hashmap_n_times(input, deps_len, parse_package_dep(id_to_npm_id))?; + Ok(( + input, + SerializedNpmResolutionSnapshotPackage { + id, + system: Default::default(), + dist: Default::default(), + dependencies, + optional_dependencies: Default::default(), + bin: None, + scripts: Default::default(), + deprecated: Default::default(), + }, + )) + } + + let (input, packages_len) = read_u32_as_usize(input)?; + + // get a hashmap of all the npm package ids to their serialized ids + let (input, data_ids_to_npm_ids) = + parse_vec_n_times(input, packages_len, parse_id) + .context("deserializing id")?; + let data_id_to_npm_id = |id: usize| { + data_ids_to_npm_ids + .get(id) + .cloned() + .ok_or_else(|| deno_core::anyhow::anyhow!("Invalid npm package id")) + }; + + let (input, root_packages_len) = read_u32_as_usize(input)?; + let (input, root_packages) = parse_hashmap_n_times( + input, + root_packages_len, + parse_root_package(&data_id_to_npm_id), + ) + .context("deserializing root package")?; + let (input, packages) = + parse_vec_n_times_with_index(input, packages_len, |input, index| { + parse_package(input, data_id_to_npm_id(index)?, &data_id_to_npm_id) + }) + .context("deserializing package")?; + + if !input.is_empty() { + bail!("Unexpected data left over"); + } + + Ok( + SerializedNpmResolutionSnapshot { + packages, + root_packages, + } + // this is ok because we have already verified that all the + // identifiers found in the snapshot are valid via the + // npm package id -> npm package id mapping + .into_valid_unsafe(), + ) +} + +fn serialize_media_type(media_type: MediaType) -> u8 { + match media_type { + MediaType::JavaScript => 0, + MediaType::Jsx => 1, + MediaType::Mjs => 2, + MediaType::Cjs => 3, + MediaType::TypeScript => 4, + MediaType::Mts => 5, + MediaType::Cts => 6, + MediaType::Dts => 7, + MediaType::Dmts => 8, + MediaType::Dcts => 9, + MediaType::Tsx => 10, + MediaType::Json => 11, + MediaType::Wasm => 12, + MediaType::TsBuildInfo => 13, + MediaType::SourceMap => 14, + MediaType::Unknown => 15, + } +} + +fn deserialize_media_type(value: u8) -> Result { + match value { + 0 => Ok(MediaType::JavaScript), + 1 => Ok(MediaType::Jsx), + 2 => Ok(MediaType::Mjs), + 3 => Ok(MediaType::Cjs), + 4 => Ok(MediaType::TypeScript), + 5 => Ok(MediaType::Mts), + 6 => Ok(MediaType::Cts), + 7 => Ok(MediaType::Dts), + 8 => Ok(MediaType::Dmts), + 9 => Ok(MediaType::Dcts), + 10 => Ok(MediaType::Tsx), + 11 => Ok(MediaType::Json), + 12 => Ok(MediaType::Wasm), + 13 => Ok(MediaType::TsBuildInfo), + 14 => Ok(MediaType::SourceMap), + 15 => Ok(MediaType::Unknown), + _ => bail!("Unknown media type value: {}", value), + } +} + +fn parse_hashmap_n_times( + mut input: &[u8], + times: usize, + parse: impl Fn(&[u8]) -> Result<(&[u8], (TKey, TValue)), AnyError>, +) -> Result<(&[u8], HashMap), AnyError> { + let mut results = HashMap::with_capacity(times); + for _ in 0..times { + let result = parse(input); + let (new_input, (key, value)) = result?; + results.insert(key, value); + input = new_input; + } + Ok((input, results)) +} + +fn parse_vec_n_times( + input: &[u8], + times: usize, + parse: impl Fn(&[u8]) -> Result<(&[u8], TResult), AnyError>, +) -> Result<(&[u8], Vec), AnyError> { + parse_vec_n_times_with_index(input, times, |input, _index| parse(input)) +} + +fn parse_vec_n_times_with_index( + mut input: &[u8], + times: usize, + parse: impl Fn(&[u8], usize) -> Result<(&[u8], TResult), AnyError>, +) -> Result<(&[u8], Vec), AnyError> { + let mut results = Vec::with_capacity(times); + for i in 0..times { + let result = parse(input, i); + let (new_input, result) = result?; + results.push(result); + input = new_input; + } + Ok((input, results)) +} + +fn read_bytes(input: &[u8], len: usize) -> Result<(&[u8], &[u8]), AnyError> { + if input.len() < len { + bail!("Unexpected end of data.",); + } + let (len_bytes, input) = input.split_at(len); + Ok((input, len_bytes)) +} + +fn read_string_lossy(input: &[u8]) -> Result<(&[u8], Cow), AnyError> { + let (input, str_len) = read_u32_as_usize(input)?; + let (input, data_bytes) = read_bytes(input, str_len)?; + Ok((input, String::from_utf8_lossy(data_bytes))) +} + +fn read_u32_as_usize(input: &[u8]) -> Result<(&[u8], usize), AnyError> { + let (input, len_bytes) = read_bytes(input, 4)?; + let len = u32::from_be_bytes(len_bytes.try_into()?); + Ok((input, len as usize)) +} + +fn read_u64(input: &[u8]) -> Result<(&[u8], u64), AnyError> { + let (input, len_bytes) = read_bytes(input, 8)?; + let len = u64::from_be_bytes(len_bytes.try_into()?); + Ok((input, len)) +} diff --git a/cli/util/text_encoding.rs b/cli/util/text_encoding.rs index 0739fcf25c396e..df72cc2be63151 100644 --- a/cli/util/text_encoding.rs +++ b/cli/util/text_encoding.rs @@ -103,6 +103,8 @@ pub fn arc_str_to_bytes(arc_str: Arc) -> Arc<[u8]> { unsafe { Arc::from_raw(raw as *const [u8]) } } +/// Converts an `Arc` to an `Arc` if able. +#[allow(dead_code)] pub fn arc_u8_to_arc_str( arc_u8: Arc<[u8]>, ) -> Result, std::str::Utf8Error> { From 788f4ab31048aee2dde00788a870bff22afa572d Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 23 Oct 2024 17:19:47 -0400 Subject: [PATCH 03/20] Tell v8 that something is a string like before. --- cli/standalone/binary.rs | 6 ++-- cli/standalone/mod.rs | 43 +++++++++------------------ cli/standalone/serialization.rs | 52 +++++++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 36 deletions(-) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 7060368ce1baa4..6569a388316ef4 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -77,8 +77,8 @@ use crate::util::progress_bar::ProgressBarStyle; use super::file_system::DenoCompileFileSystem; use super::serialization::deserialize_binary_data_section; use super::serialization::serialize_binary_data_section; +use super::serialization::DenoCompileModuleData; use super::serialization::DeserializedDataSection; -use super::serialization::RemoteModuleData; use super::serialization::RemoteModulesStore; use super::serialization::RemoteModulesStoreBuilder; use super::virtual_fs::FileBackedVfs; @@ -260,7 +260,7 @@ impl StandaloneModules { pub fn read<'a>( &'a self, specifier: &'a ModuleSpecifier, - ) -> Result>, AnyError> { + ) -> Result>, AnyError> { if specifier.scheme() == "file" { let path = deno_path_util::url_to_file_path(specifier)?; let bytes = match self.vfs.file_entry(&path) { @@ -277,7 +277,7 @@ impl StandaloneModules { } Err(err) => return Err(err.into()), }; - Ok(Some(RemoteModuleData { + Ok(Some(DenoCompileModuleData { media_type: MediaType::from_specifier(specifier), specifier, data: bytes, diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index d416637c9ee955..b483ea94155a87 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -315,36 +315,19 @@ impl ModuleLoader for EmbeddedModuleLoader { } match self.shared.modules.read(original_specifier) { - Ok(Some(module)) => deno_core::ModuleLoadResponse::Sync(Ok( - deno_core::ModuleSource::new_with_redirect( - match module.media_type { - MediaType::JavaScript - | MediaType::Jsx - | MediaType::Mjs - | MediaType::Cjs - | MediaType::TypeScript - | MediaType::Mts - | MediaType::Cts - | MediaType::Dts - | MediaType::Dmts - | MediaType::Dcts - | MediaType::Tsx => ModuleType::JavaScript, - MediaType::Json => ModuleType::Json, - MediaType::Wasm => ModuleType::Wasm, - // just assume javascript if we made it here - MediaType::TsBuildInfo - | MediaType::SourceMap - | MediaType::Unknown => ModuleType::JavaScript, - }, - ModuleSourceCode::Bytes(match module.data { - Cow::Borrowed(d) => d.into(), - Cow::Owned(d) => d.into_boxed_slice().into(), - }), - original_specifier, - module.specifier, - None, - ), - )), + Ok(Some(module)) => { + let (module_specifier, module_type, module_source) = + module.into_for_v8(); + deno_core::ModuleLoadResponse::Sync(Ok( + deno_core::ModuleSource::new_with_redirect( + module_type, + module_source, + original_specifier, + module_specifier, + None, + ), + )) + } Ok(None) => deno_core::ModuleLoadResponse::Sync(Err(type_error( format!("{MODULE_NOT_FOUND}: {}", original_specifier), ))), diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs index ce1eb2682f5ab4..9480cdb0f3899e 100644 --- a/cli/standalone/serialization.rs +++ b/cli/standalone/serialization.rs @@ -11,6 +11,9 @@ use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::serde_json; use deno_core::url::Url; +use deno_core::FastString; +use deno_core::ModuleSourceCode; +use deno_core::ModuleType; use deno_npm::resolution::SerializedNpmResolutionSnapshot; use deno_npm::resolution::SerializedNpmResolutionSnapshotPackage; use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; @@ -209,12 +212,55 @@ impl RemoteModulesStoreBuilder { } } -pub struct RemoteModuleData<'a> { +pub struct DenoCompileModuleData<'a> { pub specifier: &'a Url, pub media_type: MediaType, pub data: Cow<'static, [u8]>, } +impl<'a> DenoCompileModuleData<'a> { + pub fn into_for_v8(self) -> (&'a Url, ModuleType, ModuleSourceCode) { + fn into_bytes(data: Cow<'static, [u8]>) -> ModuleSourceCode { + ModuleSourceCode::Bytes(match data { + Cow::Borrowed(d) => d.into(), + Cow::Owned(d) => d.into_boxed_slice().into(), + }) + } + + fn into_string_unsafe(data: Cow<'static, [u8]>) -> ModuleSourceCode { + match data { + Cow::Borrowed(d) => ModuleSourceCode::String(unsafe { + FastString::from_static(std::str::from_utf8_unchecked(d)) + }), + Cow::Owned(d) => ModuleSourceCode::Bytes(d.into_boxed_slice().into()), + } + } + + let (media_type, source) = match self.media_type { + MediaType::JavaScript + | MediaType::Jsx + | MediaType::Mjs + | MediaType::Cjs + | MediaType::TypeScript + | MediaType::Mts + | MediaType::Cts + | MediaType::Dts + | MediaType::Dmts + | MediaType::Dcts + | MediaType::Tsx => { + (ModuleType::JavaScript, into_string_unsafe(self.data)) + } + MediaType::Json => (ModuleType::Json, into_string_unsafe(self.data)), + MediaType::Wasm => (ModuleType::Wasm, into_bytes(self.data)), + // just assume javascript if we made it here + MediaType::TsBuildInfo | MediaType::SourceMap | MediaType::Unknown => { + (ModuleType::JavaScript, into_bytes(self.data)) + } + }; + (self.specifier, media_type, source) + } +} + enum RemoteModulesStoreSpecifierValue { Data(usize), Redirect(Url), @@ -305,7 +351,7 @@ impl RemoteModulesStore { pub fn read<'a>( &'a self, specifier: &'a Url, - ) -> Result>, AnyError> { + ) -> Result>, AnyError> { let mut count = 0; let mut current = specifier; loop { @@ -323,7 +369,7 @@ impl RemoteModulesStore { let media_type = deserialize_media_type(media_type_byte[0])?; let (input, len) = read_u64(input)?; let (_input, data) = read_bytes(input, len as usize)?; - return Ok(Some(RemoteModuleData { + return Ok(Some(DenoCompileModuleData { specifier, media_type, data: Cow::Borrowed(data), From 742ae3f535595fca45fa1f40a98e0c12ab66583f Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 23 Oct 2024 18:31:10 -0400 Subject: [PATCH 04/20] fix byonm issue --- cli/standalone/binary.rs | 13 +++++++------ cli/standalone/mod.rs | 2 +- cli/standalone/serialization.rs | 9 ++++++--- cli/standalone/virtual_fs.rs | 19 +++++++++++++++++++ .../compile/byonm_main_sub_dir/deno.json | 2 +- tests/specs/compile/detect_cjs/output.out | 3 ++- 6 files changed, 36 insertions(+), 12 deletions(-) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 6569a388316ef4..03c4775058f7bf 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -571,7 +571,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { } } InnerCliNpmResolverRef::Byonm(resolver) => { - let npm_vfs_builder = VfsBuilder::new(root_path.clone())?; + let npm_vfs_builder = self.build_npm_vfs(&root_path, cli_options)?; ( Some(npm_vfs_builder), Some(NodeModules::Byonm { @@ -591,7 +591,6 @@ impl<'a> DenoCompileBinaryWriter<'a> { } }; let mut vfs = if let Some(npm_vfs) = maybe_npm_vfs { - // todo: probably need to modify this a bit npm_vfs } else { VfsBuilder::new(root_path.clone())? @@ -742,8 +741,8 @@ impl<'a> DenoCompileBinaryWriter<'a> { } else { // DO NOT include the user's registry url as it may contain credentials, // but also don't make this dependent on the registry url - let root_path = npm_resolver.global_cache_root_folder(); - let mut builder = VfsBuilder::new(root_path)?; + let global_cache_root_path = npm_resolver.global_cache_root_folder(); + let mut builder = VfsBuilder::new(global_cache_root_path)?; let mut packages = npm_resolver.all_system_packages(&self.npm_system_info); packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism @@ -753,12 +752,12 @@ impl<'a> DenoCompileBinaryWriter<'a> { builder.add_dir_recursive(&folder)?; } - // Flatten all the registries folders into a single "node_modules/localhost" folder + // Flatten all the registries folders into a single ".deno_compile_node_modules/localhost" folder // that will be used by denort when loading the npm cache. This avoids us exposing // the user's private registry information and means we don't have to bother // serializing all the different registry config into the binary. builder.with_root_dir(|root_dir| { - root_dir.name = "node_modules".to_string(); + root_dir.name = ".deno_compile_node_modules".to_string(); let mut new_entries = Vec::with_capacity(root_dir.entries.len()); let mut localhost_entries = IndexMap::new(); for entry in std::mem::take(&mut root_dir.entries) { @@ -793,6 +792,8 @@ impl<'a> DenoCompileBinaryWriter<'a> { root_dir.entries = new_entries; }); + builder.set_new_root_path(root_path.to_path_buf())?; + Ok(builder) } } diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index b483ea94155a87..0298695627431d 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -407,7 +407,7 @@ pub async fn run(data: StandaloneData) -> Result { let root_dir_url = Arc::new(ModuleSpecifier::from_directory_path(&root_path).unwrap()); let main_module = root_dir_url.join(&metadata.entrypoint_key).unwrap(); - let root_node_modules_path = root_path.join("node_modules"); + let root_node_modules_path = root_path.join(".deno_compile_node_modules"); let npm_cache_dir = NpmCacheDir::new( &RealDenoCacheEnv, root_node_modules_path.clone(), diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs index 9480cdb0f3899e..e008d6bcf63590 100644 --- a/cli/standalone/serialization.rs +++ b/cli/standalone/serialization.rs @@ -229,9 +229,10 @@ impl<'a> DenoCompileModuleData<'a> { fn into_string_unsafe(data: Cow<'static, [u8]>) -> ModuleSourceCode { match data { - Cow::Borrowed(d) => ModuleSourceCode::String(unsafe { - FastString::from_static(std::str::from_utf8_unchecked(d)) - }), + Cow::Borrowed(d) => ModuleSourceCode::String( + // SAFETY: we know this is a valid utf8 string + unsafe { FastString::from_static(std::str::from_utf8_unchecked(d)) }, + ), Cow::Owned(d) => ModuleSourceCode::Bytes(d.into_boxed_slice().into()), } } @@ -440,6 +441,7 @@ fn deserialize_npm_snapshot( Ok((input, id)) } + #[allow(clippy::needless_lifetimes)] // clippy bug fn parse_root_package<'a>( id_to_npm_id: &'a impl Fn(usize) -> Result, ) -> impl Fn(&[u8]) -> Result<(&[u8], (PackageReq, NpmPackageId)), AnyError> + 'a @@ -452,6 +454,7 @@ fn deserialize_npm_snapshot( } } + #[allow(clippy::needless_lifetimes)] // clippy bug fn parse_package_dep<'a>( id_to_npm_id: &'a impl Fn(usize) -> Result, ) -> impl Fn(&[u8]) -> Result<(&[u8], (String, NpmPackageId)), AnyError> + 'a diff --git a/cli/standalone/virtual_fs.rs b/cli/standalone/virtual_fs.rs index b774b88f0628ec..330dc8b9cd2fca 100644 --- a/cli/standalone/virtual_fs.rs +++ b/cli/standalone/virtual_fs.rs @@ -68,6 +68,25 @@ impl VfsBuilder { }) } + pub fn set_new_root_path( + &mut self, + root_path: PathBuf, + ) -> Result<(), AnyError> { + self.root_path = canonicalize_path(&root_path)?; + self.root_dir = VirtualDirectory { + name: self + .root_path + .file_stem() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or("root".to_string()), + entries: vec![VfsEntry::Dir(VirtualDirectory { + name: std::mem::take(&mut self.root_dir.name), + entries: std::mem::take(&mut self.root_dir.entries), + })], + }; + Ok(()) + } + pub fn with_root_dir( &mut self, with_root: impl FnOnce(&mut VirtualDirectory) -> R, diff --git a/tests/specs/compile/byonm_main_sub_dir/deno.json b/tests/specs/compile/byonm_main_sub_dir/deno.json index 6134d86d1c1156..fde86a1efb656b 100644 --- a/tests/specs/compile/byonm_main_sub_dir/deno.json +++ b/tests/specs/compile/byonm_main_sub_dir/deno.json @@ -1,3 +1,3 @@ { - "unstable": ["byonm"] + "nodeModulesDir": "manual" } diff --git a/tests/specs/compile/detect_cjs/output.out b/tests/specs/compile/detect_cjs/output.out index b53c443698094d..e1c27b8dcf4d2c 100644 --- a/tests/specs/compile/detect_cjs/output.out +++ b/tests/specs/compile/detect_cjs/output.out @@ -1 +1,2 @@ -error: Module not found: file:///[WILDLINE]/add.js +error: Uncaught SyntaxError: The requested module './add.js' does not provide an export named 'add' + at (file:///[WILDLINE]) From fd1333974be458c1c217261b4cc9924786a89c5b Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 23 Oct 2024 20:06:08 -0400 Subject: [PATCH 05/20] maybe fix ci --- cli/standalone/binary.rs | 18 +++-- cli/standalone/mod.rs | 131 ++++++++++++++++------------------- cli/standalone/virtual_fs.rs | 49 +++++++++---- cli/tools/compile.rs | 5 +- 4 files changed, 111 insertions(+), 92 deletions(-) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 03c4775058f7bf..3e62b4393ada41 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -628,13 +628,17 @@ impl<'a> DenoCompileBinaryWriter<'a> { }; if module.specifier().scheme() == "file" { let file_path = deno_path_util::url_to_file_path(module.specifier())?; - vfs.add_file_with_data( - &file_path, - match maybe_source { - Some(source) => source, - None => RealFs.read_file_sync(&file_path, None)?, - }, - )?; + vfs + .add_file_with_data( + &file_path, + match maybe_source { + Some(source) => source, + None => RealFs.read_file_sync(&file_path, None)?, + }, + ) + .with_context(|| { + format!("Failed adding '{}'", file_path.display()) + })?; } else if let Some(source) = maybe_source { remote_modules_store.add(module.specifier(), media_type, source); } diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 0298695627431d..d82c8dd09b9276 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -55,6 +55,7 @@ use crate::args::CacheSetting; use crate::args::NpmInstallDepsProvider; use crate::args::StorageKeyResolver; use crate::cache::Caches; +use crate::cache::DenoCacheEnvFsAdapter; use crate::cache::DenoDirProvider; use crate::cache::NodeAnalysisCache; use crate::cache::RealDenoCacheEnv; @@ -407,92 +408,80 @@ pub async fn run(data: StandaloneData) -> Result { let root_dir_url = Arc::new(ModuleSpecifier::from_directory_path(&root_path).unwrap()); let main_module = root_dir_url.join(&metadata.entrypoint_key).unwrap(); - let root_node_modules_path = root_path.join(".deno_compile_node_modules"); - let npm_cache_dir = NpmCacheDir::new( - &RealDenoCacheEnv, - root_node_modules_path.clone(), - vec![npm_registry_url.clone()], - ); - let npm_global_cache_dir = npm_cache_dir.get_cache_location(); + let npm_global_cache_dir = root_path.join(".deno_compile_node_modules"); let cache_setting = CacheSetting::Only; - let (fs, npm_resolver) = match metadata.node_modules { + let npm_resolver = match metadata.node_modules { Some(binary::NodeModules::Managed { node_modules_dir }) => { let snapshot = npm_snapshot.unwrap(); let maybe_node_modules_path = node_modules_dir .map(|node_modules_dir| root_path.join(node_modules_dir)); - let npm_resolver = - create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( - CliNpmResolverManagedCreateOptions { - snapshot: CliNpmResolverManagedSnapshotOption::Specified(Some( - snapshot, - )), - maybe_lockfile: None, - fs: fs.clone(), - http_client_provider: http_client_provider.clone(), - npm_global_cache_dir, - cache_setting, - text_only_progress_bar: progress_bar, - maybe_node_modules_path, - npm_system_info: Default::default(), - npm_install_deps_provider: Arc::new( - // this is only used for installing packages, which isn't necessary with deno compile - NpmInstallDepsProvider::empty(), - ), - // create an npmrc that uses the fake npm_registry_url to resolve packages - npmrc: Arc::new(ResolvedNpmRc { - default_config: deno_npm::npm_rc::RegistryConfigWithUrl { - registry_url: npm_registry_url.clone(), - config: Default::default(), - }, - scopes: Default::default(), - registry_configs: Default::default(), - }), - lifecycle_scripts: Default::default(), - }, - )) - .await?; - (fs, npm_resolver) + create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( + CliNpmResolverManagedCreateOptions { + snapshot: CliNpmResolverManagedSnapshotOption::Specified(Some( + snapshot, + )), + maybe_lockfile: None, + fs: fs.clone(), + http_client_provider: http_client_provider.clone(), + npm_global_cache_dir, + cache_setting, + text_only_progress_bar: progress_bar, + maybe_node_modules_path, + npm_system_info: Default::default(), + npm_install_deps_provider: Arc::new( + // this is only used for installing packages, which isn't necessary with deno compile + NpmInstallDepsProvider::empty(), + ), + // create an npmrc that uses the fake npm_registry_url to resolve packages + npmrc: Arc::new(ResolvedNpmRc { + default_config: deno_npm::npm_rc::RegistryConfigWithUrl { + registry_url: npm_registry_url.clone(), + config: Default::default(), + }, + scopes: Default::default(), + registry_configs: Default::default(), + }), + lifecycle_scripts: Default::default(), + }, + )) + .await? } Some(binary::NodeModules::Byonm { root_node_modules_dir, }) => { let root_node_modules_dir = root_node_modules_dir.map(|p| vfs.root().join(p)); - let npm_resolver = create_cli_npm_resolver( - CliNpmResolverCreateOptions::Byonm(CliByonmNpmResolverCreateOptions { + create_cli_npm_resolver(CliNpmResolverCreateOptions::Byonm( + CliByonmNpmResolverCreateOptions { fs: CliDenoResolverFs(fs.clone()), root_node_modules_dir, - }), - ) - .await?; - (fs, npm_resolver) + }, + )) + .await? } None => { - let fs = Arc::new(deno_fs::RealFs) as Arc; - let npm_resolver = - create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( - CliNpmResolverManagedCreateOptions { - snapshot: CliNpmResolverManagedSnapshotOption::Specified(None), - maybe_lockfile: None, - fs: fs.clone(), - http_client_provider: http_client_provider.clone(), - npm_global_cache_dir, - cache_setting, - text_only_progress_bar: progress_bar, - maybe_node_modules_path: None, - npm_system_info: Default::default(), - npm_install_deps_provider: Arc::new( - // this is only used for installing packages, which isn't necessary with deno compile - NpmInstallDepsProvider::empty(), - ), - // Packages from different registries are already inlined in the ESZip, - // so no need to create actual `.npmrc` configuration. - npmrc: create_default_npmrc(), - lifecycle_scripts: Default::default(), - }, - )) - .await?; - (fs, npm_resolver) + create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( + CliNpmResolverManagedCreateOptions { + snapshot: CliNpmResolverManagedSnapshotOption::Specified(None), + maybe_lockfile: None, + fs: fs.clone(), + http_client_provider: http_client_provider.clone(), + npm_global_cache_dir, + cache_setting, + text_only_progress_bar: progress_bar, + maybe_node_modules_path: None, + npm_system_info: Default::default(), + npm_install_deps_provider: Arc::new( + // this is only used for installing packages, which isn't necessary with deno compile + NpmInstallDepsProvider::empty(), + ), + // Packages from different registries are already inlined in the ESZip, + // so no need to create actual `.npmrc` configuration. + npmrc: create_default_npmrc(), + lifecycle_scripts: Default::default(), + }, + )) + .await? } }; diff --git a/cli/standalone/virtual_fs.rs b/cli/standalone/virtual_fs.rs index 330dc8b9cd2fca..0ae00accbf5ffd 100644 --- a/cli/standalone/virtual_fs.rs +++ b/cli/standalone/virtual_fs.rs @@ -72,7 +72,8 @@ impl VfsBuilder { &mut self, root_path: PathBuf, ) -> Result<(), AnyError> { - self.root_path = canonicalize_path(&root_path)?; + let root_path = canonicalize_path(&root_path)?; + self.root_path = root_path; self.root_dir = VirtualDirectory { name: self .root_path @@ -139,7 +140,7 @@ impl VfsBuilder { // inline the symlink and make the target file let file_bytes = std::fs::read(&target) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file_with_data(&path, file_bytes)?; + self.add_file_with_data_inner(&path, file_bytes)?; } else { log::warn!( "{} Symlink target is outside '{}'. Excluding symlink at '{}' with target '{}'.", @@ -211,19 +212,31 @@ impl VfsBuilder { self.add_file_at_path_not_symlink(&target_path) } - pub fn add_file_at_path_not_symlink( + fn add_file_at_path_not_symlink( &mut self, path: &Path, ) -> Result<(), AnyError> { let file_bytes = std::fs::read(path) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file_with_data(path, file_bytes) + self.add_file_with_data_inner(path, file_bytes) } pub fn add_file_with_data( &mut self, path: &Path, data: Vec, + ) -> Result<(), AnyError> { + let target_path = canonicalize_path(path)?; + if target_path != path { + self.add_symlink(path, &target_path)?; + } + self.add_file_with_data_inner(&target_path, data) + } + + fn add_file_with_data_inner( + &mut self, + path: &Path, + data: Vec, ) -> Result<(), AnyError> { log::debug!("Adding file '{}'", path.display()); let checksum = util::checksum::gen(&[&data]); @@ -273,8 +286,15 @@ impl VfsBuilder { path.display(), target.display() ); - let dest = self.path_relative_root(target)?; - if dest == self.path_relative_root(path)? { + let relative_target = self.path_relative_root(target)?; + let relative_path = match self.path_relative_root(path) { + Ok(path) => path, + Err(StripRootError { .. }) => { + // ignore if the original path is outside the root directory + return Ok(()); + } + }; + if relative_target == relative_path { // it's the same, ignore return Ok(()); } @@ -287,7 +307,7 @@ impl VfsBuilder { insert_index, VfsEntry::Symlink(VirtualSymlink { name: name.to_string(), - dest_parts: dest + dest_parts: relative_target .components() .map(|c| c.as_os_str().to_string_lossy().to_string()) .collect::>(), @@ -939,20 +959,23 @@ mod test { let src_path = src_path.to_path_buf(); let mut builder = VfsBuilder::new(src_path.clone()).unwrap(); builder - .add_file_with_data(&src_path.join("a.txt"), "data".into()) + .add_file_with_data_inner(&src_path.join("a.txt"), "data".into()) .unwrap(); builder - .add_file_with_data(&src_path.join("b.txt"), "data".into()) + .add_file_with_data_inner(&src_path.join("b.txt"), "data".into()) .unwrap(); assert_eq!(builder.files.len(), 1); // because duplicate data builder - .add_file_with_data(&src_path.join("c.txt"), "c".into()) + .add_file_with_data_inner(&src_path.join("c.txt"), "c".into()) .unwrap(); builder - .add_file_with_data(&src_path.join("sub_dir").join("d.txt"), "d".into()) + .add_file_with_data_inner( + &src_path.join("sub_dir").join("d.txt"), + "d".into(), + ) .unwrap(); builder - .add_file_with_data(&src_path.join("e.txt"), "e".into()) + .add_file_with_data_inner(&src_path.join("e.txt"), "e".into()) .unwrap(); builder .add_symlink( @@ -1120,7 +1143,7 @@ mod test { let temp_path = temp_dir.path().canonicalize(); let mut builder = VfsBuilder::new(temp_path.to_path_buf()).unwrap(); builder - .add_file_with_data( + .add_file_with_data_inner( temp_path.join("a.txt").as_path(), "0123456789".to_string().into_bytes(), ) diff --git a/cli/tools/compile.rs b/cli/tools/compile.rs index bca280ffc7e680..5a4a938bb90870 100644 --- a/cli/tools/compile.rs +++ b/cli/tools/compile.rs @@ -134,7 +134,10 @@ pub async fn compile( ) .await .with_context(|| { - format!("Writing temporary file '{}'", temp_path.display()) + format!( + "Writing deno compile executable to temporary file '{}'", + temp_path.display() + ) }); // set it as executable From 1e0dabd4573411440d5065f011fd86a14ccdfc3c Mon Sep 17 00:00:00 2001 From: David Sherret Date: Wed, 23 Oct 2024 21:01:53 -0400 Subject: [PATCH 06/20] do not store data urls in the binary --- cli/standalone/binary.rs | 3 +++ cli/standalone/serialization.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 3e62b4393ada41..68182c37128cb7 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -597,6 +597,9 @@ impl<'a> DenoCompileBinaryWriter<'a> { }; let mut remote_modules_store = RemoteModulesStoreBuilder::default(); for module in graph.modules() { + if module.specifier().scheme() == "data" { + continue; // don't store data urls as an entry as they're in the code + } let (maybe_source, media_type) = match module { deno_graph::Module::Js(m) => { // todo(https://github.com/denoland/deno_media_type/pull/12): use is_emittable() diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs index e008d6bcf63590..9494e18dfac104 100644 --- a/cli/standalone/serialization.rs +++ b/cli/standalone/serialization.rs @@ -228,6 +228,9 @@ impl<'a> DenoCompileModuleData<'a> { } fn into_string_unsafe(data: Cow<'static, [u8]>) -> ModuleSourceCode { + // todo(https://github.com/denoland/deno_core/pull/943): store whether + // the string is ascii or not ahead of time so we can avoid the is_ascii() + // check in FastString::from_static match data { Cow::Borrowed(d) => ModuleSourceCode::String( // SAFETY: we know this is a valid utf8 string From d7cd10b91a7d43fa0cb5ee39739b165ac46e88e3 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 09:55:44 -0400 Subject: [PATCH 07/20] switch to le because this is not network --- cli/standalone/binary.rs | 7 ------ cli/standalone/serialization.rs | 38 ++++++++++++++++----------------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 68182c37128cb7..dba0a364d65b96 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -348,13 +348,6 @@ pub fn extract_standalone( })) } -fn u64_from_bytes(arr: &[u8]) -> Result { - let fixed_arr: &[u8; 8] = arr - .try_into() - .context("Failed to convert the buffer into a fixed-size array")?; - Ok(u64::from_be_bytes(*fixed_arr)) -} - pub struct DenoCompileBinaryWriter<'a> { deno_dir: &'a DenoDir, emitter: &'a Emitter, diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs index 9494e18dfac104..12927845bc3e46 100644 --- a/cli/standalone/serialization.rs +++ b/cli/standalone/serialization.rs @@ -42,7 +42,7 @@ pub fn serialize_binary_data_section( vfs: VfsBuilder, ) -> Result, AnyError> { fn write_bytes_with_len(bytes: &mut Vec, data: &[u8]) { - bytes.extend_from_slice(&(data.len() as u64).to_be_bytes()); + bytes.extend_from_slice(&(data.len() as u64).to_le_bytes()); bytes.extend_from_slice(data); } @@ -63,11 +63,11 @@ pub fn serialize_binary_data_section( // 3. Remote modules { let update_index = bytes.len(); - bytes.extend_from_slice(&(0_u64).to_be_bytes()); + bytes.extend_from_slice(&(0_u64).to_le_bytes()); let start_index = bytes.len(); remote_modules.write(&mut bytes)?; let length = bytes.len() - start_index; - let length_bytes = (length as u64).to_be_bytes(); + let length_bytes = (length as u64).to_le_bytes(); bytes[update_index..update_index + length_bytes.len()] .copy_from_slice(&length_bytes); } @@ -77,7 +77,7 @@ pub fn serialize_binary_data_section( let vfs = serde_json::to_string(&vfs)?; write_bytes_with_len(&mut bytes, vfs.as_bytes()); let vfs_bytes_len = vfs_files.iter().map(|f| f.len() as u64).sum::(); - bytes.extend_from_slice(&vfs_bytes_len.to_be_bytes()); + bytes.extend_from_slice(&vfs_bytes_len.to_le_bytes()); for file in &vfs_files { bytes.extend_from_slice(file); } @@ -190,22 +190,22 @@ impl RemoteModulesStoreBuilder { } fn write(&self, writer: &mut dyn Write) -> Result<(), AnyError> { - writer.write_all(&(self.specifiers.len() as u32).to_be_bytes())?; - writer.write_all(&(self.redirects.len() as u32).to_be_bytes())?; + writer.write_all(&(self.specifiers.len() as u32).to_le_bytes())?; + writer.write_all(&(self.redirects.len() as u32).to_le_bytes())?; for (specifier, offset) in &self.specifiers { - writer.write_all(&(specifier.len() as u32).to_be_bytes())?; + writer.write_all(&(specifier.len() as u32).to_le_bytes())?; writer.write_all(specifier.as_bytes())?; - writer.write_all(&offset.to_be_bytes())?; + writer.write_all(&offset.to_le_bytes())?; } for (from, to) in &self.redirects { - writer.write_all(&(from.len() as u32).to_be_bytes())?; + writer.write_all(&(from.len() as u32).to_le_bytes())?; writer.write_all(from.as_bytes())?; - writer.write_all(&(to.len() as u32).to_be_bytes())?; + writer.write_all(&(to.len() as u32).to_le_bytes())?; writer.write_all(to.as_bytes())?; } for (media_type, data) in &self.data { writer.write_all(&[serialize_media_type(*media_type)])?; - writer.write_all(&(data.len() as u64).to_be_bytes())?; + writer.write_all(&(data.len() as u64).to_le_bytes())?; writer.write_all(data)?; } Ok(()) @@ -392,7 +392,7 @@ fn serialize_npm_snapshot( ) -> Vec { fn append_string(bytes: &mut Vec, string: &str) { let len = string.len() as u32; - bytes.extend_from_slice(&len.to_be_bytes()); + bytes.extend_from_slice(&len.to_le_bytes()); bytes.extend_from_slice(string.as_bytes()); } @@ -408,27 +408,27 @@ fn serialize_npm_snapshot( root_packages.sort(); let mut bytes = Vec::new(); - bytes.extend_from_slice(&(snapshot.packages.len() as u32).to_be_bytes()); + bytes.extend_from_slice(&(snapshot.packages.len() as u32).to_le_bytes()); for pkg in &snapshot.packages { append_string(&mut bytes, &pkg.id.as_serialized()); } - bytes.extend_from_slice(&(root_packages.len() as u32).to_be_bytes()); + bytes.extend_from_slice(&(root_packages.len() as u32).to_le_bytes()); for (req, id) in root_packages { append_string(&mut bytes, &req.to_string()); let id = ids_to_stored_ids.get(&id).unwrap(); - bytes.extend_from_slice(&id.to_be_bytes()); + bytes.extend_from_slice(&id.to_le_bytes()); } for pkg in &snapshot.packages { let deps_len = pkg.dependencies.len() as u32; - bytes.extend_from_slice(&deps_len.to_be_bytes()); + bytes.extend_from_slice(&deps_len.to_le_bytes()); let mut deps: Vec<_> = pkg.dependencies.iter().collect(); deps.sort(); for (req, id) in deps { append_string(&mut bytes, req); let id = ids_to_stored_ids.get(&id).unwrap(); - bytes.extend_from_slice(&id.to_be_bytes()); + bytes.extend_from_slice(&id.to_le_bytes()); } } @@ -631,12 +631,12 @@ fn read_string_lossy(input: &[u8]) -> Result<(&[u8], Cow), AnyError> { fn read_u32_as_usize(input: &[u8]) -> Result<(&[u8], usize), AnyError> { let (input, len_bytes) = read_bytes(input, 4)?; - let len = u32::from_be_bytes(len_bytes.try_into()?); + let len = u32::from_le_bytes(len_bytes.try_into()?); Ok((input, len as usize)) } fn read_u64(input: &[u8]) -> Result<(&[u8], u64), AnyError> { let (input, len_bytes) = read_bytes(input, 8)?; - let len = u64::from_be_bytes(len_bytes.try_into()?); + let len = u64::from_le_bytes(len_bytes.try_into()?); Ok((input, len)) } From c1569449e0b1e21d6ef05f6dd38f3a79f09ab888 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 15:13:33 -0400 Subject: [PATCH 08/20] review --- cli/standalone/binary.rs | 10 +++++++--- cli/standalone/mod.rs | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index dba0a364d65b96..0f8b0b49d4a408 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -312,9 +312,13 @@ pub fn extract_standalone( }; let root_path = { - let current_exe_path = std::env::current_exe().unwrap(); - let current_exe_name = - current_exe_path.file_name().unwrap().to_string_lossy(); + let maybe_current_exe = std::env::current_exe().ok(); + let current_exe_name = maybe_current_exe + .as_ref() + .and_then(|p| p.file_name()) + .map(|p| p.to_string_lossy()) + // should never happen + .unwrap_or_else(|| Cow::Borrowed("binary")); std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)) }; let cli_args = cli_args.into_owned(); diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index d82c8dd09b9276..3a62b6ff96d63b 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -475,7 +475,7 @@ pub async fn run(data: StandaloneData) -> Result { // this is only used for installing packages, which isn't necessary with deno compile NpmInstallDepsProvider::empty(), ), - // Packages from different registries are already inlined in the ESZip, + // Packages from different registries are already inlined in the binary, // so no need to create actual `.npmrc` configuration. npmrc: create_default_npmrc(), lifecycle_scripts: Default::default(), From 938c3e00735fc313988bcd58f3ebddb48908255e Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 13:30:15 -0400 Subject: [PATCH 09/20] perf(compile): code cache for initial load --- cli/standalone/binary.rs | 9 ++ cli/standalone/code_cache.rs | 288 +++++++++++++++++++++++++++++++++++ cli/standalone/mod.rs | 73 ++++++++- runtime/code_cache.rs | 14 +- 4 files changed, 369 insertions(+), 15 deletions(-) create mode 100644 cli/standalone/code_cache.rs diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 0f8b0b49d4a408..5a961cc6f10346 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -54,6 +54,7 @@ use indexmap::IndexMap; use log::Level; use serde::Deserialize; use serde::Serialize; +use windows_sys::Wdk::System; use crate::args::CaData; use crate::args::CliOptions; @@ -171,6 +172,9 @@ pub struct SerializedWorkspaceResolver { pub struct Metadata { pub argv: Vec, pub seed: Option, + /// A randomly generated value that is used as the cache key for this + /// compilation. + pub cache_key: String, pub permissions: PermissionFlags, pub location: Option, pub v8_flags: Vec, @@ -656,6 +660,11 @@ impl<'a> DenoCompileBinaryWriter<'a> { let metadata = Metadata { argv: compile_flags.args.clone(), seed: cli_options.seed(), + cache_key: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + .to_string(), location: cli_options.location_flag().clone(), permissions: cli_options.permission_flags().clone(), v8_flags: cli_options.v8_flags().clone(), diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs new file mode 100644 index 00000000000000..aac1563b62f1e2 --- /dev/null +++ b/cli/standalone/code_cache.rs @@ -0,0 +1,288 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::io::BufReader; +use std::io::BufWriter; +use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::bail; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::unsync::sync::AtomicFlag; +use deno_runtime::code_cache::CodeCache; +use deno_runtime::code_cache::CodeCacheType; + +use crate::cache::FastInsecureHasher; +use crate::util::path::get_atomic_file_path; + +struct MutableData { + cache: HashMap, + modified: bool, + add_count: usize, +} + +impl MutableData { + fn take_from_cache( + &mut self, + specifier: &ModuleSpecifier, + source_hash: u64, + ) -> Option> { + let entry = self.cache.remove(specifier.as_str())?; + if entry.source_hash != source_hash { + return None; + } + Some(entry.data) + } + + fn take_cache_data( + &mut self, + ) -> Option> { + // always purge this from memory + let cache_data = std::mem::take(&mut self.cache); + + if !self.modified { + return None; + } + Some(cache_data) + } +} + +#[derive(Debug, Clone)] +pub struct DenoCompileCodeCacheEntry { + pub source_hash: u64, + pub data: Vec, +} + +pub struct DenoCompileCodeCache { + cache_key: String, + file_path: PathBuf, + finished: AtomicFlag, + data: Mutex, +} + +impl DenoCompileCodeCache { + pub fn new(file_path: PathBuf, cache_key: String) -> Self { + // attempt to deserialize the cache data + let cache = match deserialize(&file_path, &cache_key) { + Ok(cache) => cache, + Err(err) => { + log::debug!("Failed to deserialize code cache: {}", err); + HashMap::new() + } + }; + + Self { + cache_key, + file_path, + finished: AtomicFlag::lowered(), + data: Mutex::new(MutableData { + cache, + modified: false, + add_count: 0, + }), + } + } + + fn write_cache_data( + &self, + cache_data: &HashMap, + ) { + let temp_file = get_atomic_file_path(&self.file_path); + match serialize(&temp_file, &self.cache_key, cache_data) { + Ok(()) => { + if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { + log::debug!("Failed to rename code cache: {}", err); + } + } + Err(err) => { + let _ = std::fs::remove_file(&temp_file); + log::debug!("Failed to serialize code cache: {}", err); + } + } + } +} + +impl CodeCache for DenoCompileCodeCache { + fn get_sync( + &self, + specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + ) -> Option> { + if self.finished.is_raised() { + return None; + } + let mut data = self.data.lock(); + match data.take_from_cache(specifier, source_hash) { + Some(data) => Some(data), + None => { + data.add_count += 1; + None + } + } + } + + fn set_sync( + &self, + specifier: ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + bytes: &[u8], + ) { + if self.finished.is_raised() { + return; + } + let data_to_serialize = { + let mut data = self.data.lock(); + data.cache.insert( + specifier.to_string(), + DenoCompileCodeCacheEntry { + source_hash, + data: bytes.to_vec(), + }, + ); + data.modified = true; + if data.add_count != 0 { + data.add_count -= 1; + } + if data.add_count == 0 { + // don't allow using the cache anymore + self.finished.raise(); + data.take_cache_data() + } else { + None + } + }; + if let Some(cache_data) = &data_to_serialize { + self.write_cache_data(&cache_data); + } + } + + fn enabled(&self) -> bool { + !self.finished.is_raised() + } +} + +/// File format: +/// -
+/// - +/// - +/// - <[entry length]> - u64 * number of entries +/// - <[entry]> +/// - <[u8]: entry data> +/// - +/// - +/// - +/// - +fn serialize( + file_path: &Path, + cache_key: &str, + cache: &HashMap, +) -> Result<(), AnyError> { + let cache_file = std::fs::OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(file_path)?; + let mut writer = BufWriter::new(cache_file); + // header + writer.write_all(cache_key.as_bytes())?; + writer.write_all(&(cache.len() as u32).to_le_bytes())?; + // lengths of each entry + for (specifier, entry) in cache { + let len: u64 = entry.data.len() as u64 + specifier.len() as u64 + 4 + 8 + 8; + writer.write_all(&len.to_le_bytes())?; + } + // entries + for (specifier, entry) in cache { + writer.write_all(&entry.data)?; + writer.write_all(specifier.as_bytes())?; + writer.write_all(&(specifier.len() as u32).to_le_bytes())?; + writer.write_all(&entry.source_hash.to_le_bytes())?; + let hash: u64 = FastInsecureHasher::new_without_deno_version() + .write(&entry.data) + .finish(); + writer.write_all(&hash.to_le_bytes())?; + } + + writer.flush()?; + + Ok(()) +} + +fn deserialize( + file_path: &Path, + cache_key: &str, +) -> Result, AnyError> { + let cache_file = std::fs::File::open(file_path)?; + let mut reader = BufReader::new(cache_file); + let mut header_bytes = vec![0; cache_key.len() + 4]; + reader.read_exact(&mut header_bytes)?; + if &header_bytes[..cache_key.len()] != cache_key.as_bytes() { + // cache bust + bail!("Cache key mismatch"); + } + let len = + u32::from_le_bytes(header_bytes[cache_key.len()..].try_into()?) as usize; + // read the lengths for each entry found in the file + let entry_len_bytes_capacity = len * 8; + let mut entry_len_bytes = Vec::new(); + entry_len_bytes.try_reserve(entry_len_bytes_capacity)?; + entry_len_bytes.resize(entry_len_bytes_capacity, 0); + reader.read_exact(&mut entry_len_bytes)?; + let mut lengths = Vec::new(); + lengths.try_reserve(len)?; + for i in 0..len { + let pos = i * 8; + lengths.push( + u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize, + ); + } + + let mut map = HashMap::new(); + map.try_reserve(len)?; + for len in lengths { + let mut buffer = Vec::new(); + buffer.try_reserve(len)?; + buffer.resize(len, 0); + + reader.read_exact(&mut buffer)?; + let entry_data_hash_start_pos = buffer.len() - 8; + let expected_entry_data_hash = + u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?); + let source_hash_start_pos = entry_data_hash_start_pos - 8; + let source_hash = u64::from_le_bytes( + buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?, + ); + let specifier_end_pos = source_hash_start_pos - 4; + let specifier_len = u32::from_le_bytes( + buffer[specifier_end_pos..source_hash_start_pos].try_into()?, + ) as usize; + let specifier_start_pos = specifier_end_pos - specifier_len; + let specifier = String::from_utf8( + buffer[specifier_start_pos..specifier_end_pos].to_vec(), + )?; + buffer.truncate(specifier_start_pos); + let actual_entry_data_hash: u64 = + FastInsecureHasher::new_without_deno_version() + .write(&buffer) + .finish(); + if expected_entry_data_hash != actual_entry_data_hash { + bail!("Hash mismatch.") + } + map.insert( + specifier, + DenoCompileCodeCacheEntry { + source_hash, + data: buffer, + }, + ); + } + + Ok(map) +} diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 3a62b6ff96d63b..98584ddd99e7aa 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -7,6 +7,7 @@ use binary::StandaloneData; use binary::StandaloneModules; +use code_cache::DenoCompileCodeCache; use deno_ast::MediaType; use deno_cache_dir::npm::NpmCacheDir; use deno_config::workspace::MappedResolution; @@ -17,6 +18,7 @@ use deno_core::anyhow::Context; use deno_core::error::generic_error; use deno_core::error::type_error; use deno_core::error::AnyError; +use deno_core::futures::future::LocalBoxFuture; use deno_core::futures::FutureExt; use deno_core::v8_set_flags; use deno_core::FeatureChecker; @@ -26,6 +28,7 @@ use deno_core::ModuleSpecifier; use deno_core::ModuleType; use deno_core::RequestedModuleType; use deno_core::ResolutionKind; +use deno_core::SourceCodeCacheInfo; use deno_npm::npm_rc::ResolvedNpmRc; use deno_package_json::PackageJsonDepValue; use deno_runtime::deno_fs; @@ -57,6 +60,7 @@ use crate::args::StorageKeyResolver; use crate::cache::Caches; use crate::cache::DenoCacheEnvFsAdapter; use crate::cache::DenoDirProvider; +use crate::cache::FastInsecureHasher; use crate::cache::NodeAnalysisCache; use crate::cache::RealDenoCacheEnv; use crate::http_util::HttpClientProvider; @@ -79,6 +83,7 @@ use crate::worker::ModuleLoaderAndSourceMapGetter; use crate::worker::ModuleLoaderFactory; pub mod binary; +mod code_cache; mod file_system; mod serialization; mod virtual_fs; @@ -95,6 +100,32 @@ struct SharedModuleLoaderState { workspace_resolver: WorkspaceResolver, node_resolver: Arc, npm_module_loader: Arc, + code_cache: Arc, +} + +impl SharedModuleLoaderState { + fn get_code_cache( + &self, + specifier: &ModuleSpecifier, + source: &[u8], + ) -> Option { + if !self.code_cache.enabled() { + return None; + } + // deno version is already included in the root cache key + let hash = FastInsecureHasher::new_without_deno_version() + .write_hashable(source) + .finish(); + let data = self.code_cache.get_sync( + specifier, + deno_runtime::code_cache::CodeCacheType::EsModule, + hash, + ); + Some(SourceCodeCacheInfo { + hash, + data: data.map(Cow::Owned), + }) + } } #[derive(Clone)] @@ -292,14 +323,19 @@ impl ModuleLoader for EmbeddedModuleLoader { } if self.shared.node_resolver.in_npm_package(original_specifier) { - let npm_module_loader = self.shared.npm_module_loader.clone(); + let shared = self.shared.clone(); let original_specifier = original_specifier.clone(); let maybe_referrer = maybe_referrer.cloned(); return deno_core::ModuleLoadResponse::Async( async move { - let code_source = npm_module_loader + let code_source = shared + .npm_module_loader .load(&original_specifier, maybe_referrer.as_ref()) .await?; + let code_cache_entry = shared.get_code_cache( + &code_source.found_url, + code_source.code.as_bytes(), + ); Ok(deno_core::ModuleSource::new_with_redirect( match code_source.media_type { MediaType::Json => ModuleType::Json, @@ -308,7 +344,7 @@ impl ModuleLoader for EmbeddedModuleLoader { code_source.code, &original_specifier, &code_source.found_url, - None, + code_cache_entry, )) } .boxed_local(), @@ -319,13 +355,16 @@ impl ModuleLoader for EmbeddedModuleLoader { Ok(Some(module)) => { let (module_specifier, module_type, module_source) = module.into_for_v8(); + let code_cache_entry = self + .shared + .get_code_cache(&module_specifier, module_source.as_bytes()); deno_core::ModuleLoadResponse::Sync(Ok( deno_core::ModuleSource::new_with_redirect( module_type, module_source, original_specifier, module_specifier, - None, + code_cache_entry, ), )) } @@ -337,6 +376,21 @@ impl ModuleLoader for EmbeddedModuleLoader { ))), } } + + fn code_cache_ready( + &self, + specifier: ModuleSpecifier, + source_hash: u64, + code_cache: &[u8], + ) -> LocalBoxFuture<'static, ()> { + self.shared.code_cache.set_sync( + specifier, + deno_runtime::code_cache::CodeCacheType::EsModule, + source_hash, + code_cache, + ); + std::future::ready(()).boxed_local() + } } struct StandaloneModuleLoaderFactory { @@ -560,6 +614,13 @@ pub async fn run(data: StandaloneData) -> Result { metadata.workspace_resolver.pkg_json_resolution, ) }; + let code_cache = Arc::new(DenoCompileCodeCache::new( + root_path.with_file_name(format!( + "{}.cache", + root_path.file_name().unwrap().to_string_lossy() + )), + metadata.cache_key, + )); let module_loader_factory = StandaloneModuleLoaderFactory { shared: Arc::new(SharedModuleLoaderState { modules, @@ -571,6 +632,7 @@ pub async fn run(data: StandaloneData) -> Result { fs.clone(), cli_node_resolver, )), + code_cache: code_cache.clone(), }), }; @@ -610,8 +672,7 @@ pub async fn run(data: StandaloneData) -> Result { let worker_factory = CliMainWorkerFactory::new( Arc::new(BlobStore::default()), cjs_resolutions, - // Code cache is not supported for standalone binary yet. - None, + Some(code_cache), feature_checker, fs, None, diff --git a/runtime/code_cache.rs b/runtime/code_cache.rs index 2a56543a4127f8..f1066408287c9c 100644 --- a/runtime/code_cache.rs +++ b/runtime/code_cache.rs @@ -7,15 +7,6 @@ pub enum CodeCacheType { Script, } -impl CodeCacheType { - pub fn as_str(&self) -> &str { - match self { - Self::EsModule => "esmodule", - Self::Script => "script", - } - } -} - pub trait CodeCache: Send + Sync { fn get_sync( &self, @@ -30,4 +21,9 @@ pub trait CodeCache: Send + Sync { source_hash: u64, data: &[u8], ); + + /// Gets if the code cache is still enabled. + fn enabled(&self) -> bool { + true + } } From 0a7c050bbc7c90c36d56da8f559ca1445f93bde1 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 14:45:32 -0400 Subject: [PATCH 10/20] use distinct strategies for compile --- cli/cache/code_cache.rs | 10 ++ cli/standalone/code_cache.rs | 245 +++++++++++++++++++++-------------- cli/standalone/mod.rs | 3 +- cli/worker.rs | 15 ++- runtime/code_cache.rs | 6 +- 5 files changed, 170 insertions(+), 109 deletions(-) diff --git a/cli/cache/code_cache.rs b/cli/cache/code_cache.rs index abcd0d46ac1cff..b1d9ae757b9038 100644 --- a/cli/cache/code_cache.rs +++ b/cli/cache/code_cache.rs @@ -1,10 +1,14 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::sync::Arc; + use deno_ast::ModuleSpecifier; use deno_core::error::AnyError; use deno_runtime::code_cache; use deno_runtime::deno_webstorage::rusqlite::params; +use crate::worker::CliCodeCache; + use super::cache_db::CacheDB; use super::cache_db::CacheDBConfiguration; use super::cache_db::CacheDBHash; @@ -82,6 +86,12 @@ impl CodeCache { } } +impl CliCodeCache for CodeCache { + fn as_code_cache(self: Arc) -> Arc { + self + } +} + impl code_cache::CodeCache for CodeCache { fn get_sync( &self, diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs index aac1563b62f1e2..4050e49ea3789b 100644 --- a/cli/standalone/code_cache.rs +++ b/cli/standalone/code_cache.rs @@ -8,6 +8,7 @@ use std::io::Read; use std::io::Write; use std::path::Path; use std::path::PathBuf; +use std::sync::Arc; use deno_ast::ModuleSpecifier; use deno_core::anyhow::bail; @@ -19,37 +20,11 @@ use deno_runtime::code_cache::CodeCacheType; use crate::cache::FastInsecureHasher; use crate::util::path::get_atomic_file_path; +use crate::worker::CliCodeCache; -struct MutableData { - cache: HashMap, - modified: bool, - add_count: usize, -} - -impl MutableData { - fn take_from_cache( - &mut self, - specifier: &ModuleSpecifier, - source_hash: u64, - ) -> Option> { - let entry = self.cache.remove(specifier.as_str())?; - if entry.source_hash != source_hash { - return None; - } - Some(entry.data) - } - - fn take_cache_data( - &mut self, - ) -> Option> { - // always purge this from memory - let cache_data = std::mem::take(&mut self.cache); - - if !self.modified { - return None; - } - Some(cache_data) - } +enum CodeCacheStrategy { + FirstRun(FirstRunCodeCacheStrategy), + SubsequentRun(SubsequentRunCodeCacheStrategy), } #[derive(Debug, Clone)] @@ -59,49 +34,37 @@ pub struct DenoCompileCodeCacheEntry { } pub struct DenoCompileCodeCache { - cache_key: String, - file_path: PathBuf, - finished: AtomicFlag, - data: Mutex, + strategy: CodeCacheStrategy, } impl DenoCompileCodeCache { pub fn new(file_path: PathBuf, cache_key: String) -> Self { // attempt to deserialize the cache data - let cache = match deserialize(&file_path, &cache_key) { - Ok(cache) => cache, - Err(err) => { - log::debug!("Failed to deserialize code cache: {}", err); - HashMap::new() - } - }; - - Self { - cache_key, - file_path, - finished: AtomicFlag::lowered(), - data: Mutex::new(MutableData { - cache, - modified: false, - add_count: 0, - }), - } - } - - fn write_cache_data( - &self, - cache_data: &HashMap, - ) { - let temp_file = get_atomic_file_path(&self.file_path); - match serialize(&temp_file, &self.cache_key, cache_data) { - Ok(()) => { - if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { - log::debug!("Failed to rename code cache: {}", err); + match deserialize(&file_path, &cache_key) { + Ok(data) => { + log::debug!("Loaded {} code cache entries", data.len()); + Self { + strategy: CodeCacheStrategy::SubsequentRun( + SubsequentRunCodeCacheStrategy { + is_finished: AtomicFlag::lowered(), + data: Mutex::new(data), + }, + ), } } Err(err) => { - let _ = std::fs::remove_file(&temp_file); - log::debug!("Failed to serialize code cache: {}", err); + log::debug!("Failed to deserialize code cache: {:#}", err); + Self { + strategy: CodeCacheStrategy::FirstRun(FirstRunCodeCacheStrategy { + cache_key, + file_path, + is_finished: AtomicFlag::lowered(), + data: Mutex::new(FirstRunCodeCacheData { + cache: HashMap::new(), + add_count: 0, + }), + }), + } } } } @@ -111,60 +74,142 @@ impl CodeCache for DenoCompileCodeCache { fn get_sync( &self, specifier: &ModuleSpecifier, - code_cache_type: CodeCacheType, + _code_cache_type: CodeCacheType, source_hash: u64, ) -> Option> { - if self.finished.is_raised() { - return None; - } - let mut data = self.data.lock(); - match data.take_from_cache(specifier, source_hash) { - Some(data) => Some(data), - None => { - data.add_count += 1; + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + if !strategy.is_finished.is_raised() { + strategy.data.lock().add_count += 1; + } None } + CodeCacheStrategy::SubsequentRun(strategy) => { + if strategy.is_finished.is_raised() { + return None; + } + strategy.take_from_cache(specifier, source_hash) + } } } fn set_sync( &self, specifier: ModuleSpecifier, - code_cache_type: CodeCacheType, + _code_cache_type: CodeCacheType, source_hash: u64, bytes: &[u8], ) { - if self.finished.is_raised() { - return; - } - let data_to_serialize = { - let mut data = self.data.lock(); - data.cache.insert( - specifier.to_string(), - DenoCompileCodeCacheEntry { - source_hash, - data: bytes.to_vec(), - }, - ); - data.modified = true; - if data.add_count != 0 { - data.add_count -= 1; + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + if strategy.is_finished.is_raised() { + return; + } + + let data_to_serialize = { + let mut data = strategy.data.lock(); + data.cache.insert( + specifier.to_string(), + DenoCompileCodeCacheEntry { + source_hash, + data: bytes.to_vec(), + }, + ); + if data.add_count != 0 { + data.add_count -= 1; + } + if data.add_count == 0 { + // don't allow using the cache anymore + strategy.is_finished.raise(); + if data.cache.is_empty() { + None + } else { + Some(std::mem::take(&mut data.cache)) + } + } else { + None + } + }; + if let Some(cache_data) = &data_to_serialize { + strategy.write_cache_data(&cache_data); + } } - if data.add_count == 0 { - // don't allow using the cache anymore - self.finished.raise(); - data.take_cache_data() - } else { - None + CodeCacheStrategy::SubsequentRun(_) => { + // do nothing } - }; - if let Some(cache_data) = &data_to_serialize { - self.write_cache_data(&cache_data); } } +} +impl CliCodeCache for DenoCompileCodeCache { fn enabled(&self) -> bool { - !self.finished.is_raised() + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + !strategy.is_finished.is_raised() + } + CodeCacheStrategy::SubsequentRun(strategy) => { + !strategy.is_finished.is_raised() + } + } + } + + fn as_code_cache(self: Arc) -> Arc { + self + } +} + +struct FirstRunCodeCacheData { + cache: HashMap, + add_count: usize, +} + +struct FirstRunCodeCacheStrategy { + cache_key: String, + file_path: PathBuf, + is_finished: AtomicFlag, + data: Mutex, +} + +impl FirstRunCodeCacheStrategy { + fn write_cache_data( + &self, + cache_data: &HashMap, + ) { + let count = cache_data.len(); + let temp_file = get_atomic_file_path(&self.file_path); + match serialize(&temp_file, &self.cache_key, cache_data) { + Ok(()) => { + if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { + log::debug!("Failed to rename code cache: {}", err); + } else { + log::debug!("Serialized {} code cache entries", count); + } + } + Err(err) => { + let _ = std::fs::remove_file(&temp_file); + log::debug!("Failed to serialize code cache: {}", err); + } + } + } +} + +struct SubsequentRunCodeCacheStrategy { + is_finished: AtomicFlag, + data: Mutex>, +} + +impl SubsequentRunCodeCacheStrategy { + fn take_from_cache( + &self, + specifier: &ModuleSpecifier, + source_hash: u64, + ) -> Option> { + let mut data = self.data.lock(); + let entry = data.remove(specifier.as_str())?; + if entry.source_hash != source_hash { + return None; + } + Some(entry.data) } } diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 98584ddd99e7aa..89b9393ef5abcb 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -77,6 +77,7 @@ use crate::resolver::NpmModuleLoader; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use crate::util::v8::construct_v8_flags; +use crate::worker::CliCodeCache; use crate::worker::CliMainWorkerFactory; use crate::worker::CliMainWorkerOptions; use crate::worker::ModuleLoaderAndSourceMapGetter; @@ -100,7 +101,7 @@ struct SharedModuleLoaderState { workspace_resolver: WorkspaceResolver, node_resolver: Arc, npm_module_loader: Arc, - code_cache: Arc, + code_cache: Arc, } impl SharedModuleLoaderState { diff --git a/cli/worker.rs b/cli/worker.rs index e230197d2b56b5..d606ce18736ccd 100644 --- a/cli/worker.rs +++ b/cli/worker.rs @@ -82,6 +82,15 @@ pub trait HmrRunner: Send + Sync { async fn run(&mut self) -> Result<(), AnyError>; } +pub trait CliCodeCache: code_cache::CodeCache { + /// Gets if the code cache is still enabled. + fn enabled(&self) -> bool { + true + } + + fn as_code_cache(self: Arc) -> Arc; +} + #[async_trait::async_trait(?Send)] pub trait CoverageCollector: Send + Sync { async fn start_collecting(&mut self) -> Result<(), AnyError>; @@ -129,7 +138,7 @@ struct SharedWorkerState { blob_store: Arc, broadcast_channel: InMemoryBroadcastChannel, cjs_resolution_store: Arc, - code_cache: Option>, + code_cache: Option>, compiled_wasm_module_store: CompiledWasmModuleStore, feature_checker: Arc, fs: Arc, @@ -427,7 +436,7 @@ impl CliMainWorkerFactory { pub fn new( blob_store: Arc, cjs_resolution_store: Arc, - code_cache: Option>, + code_cache: Option>, feature_checker: Arc, fs: Arc, maybe_file_watcher_communicator: Option>, @@ -607,7 +616,7 @@ impl CliMainWorkerFactory { ), feature_checker, permissions, - v8_code_cache: shared.code_cache.clone(), + v8_code_cache: shared.code_cache.clone().map(|c| c.as_code_cache()), }; let options = WorkerOptions { bootstrap: BootstrapOptions { diff --git a/runtime/code_cache.rs b/runtime/code_cache.rs index f1066408287c9c..ff656a01883c11 100644 --- a/runtime/code_cache.rs +++ b/runtime/code_cache.rs @@ -14,6 +14,7 @@ pub trait CodeCache: Send + Sync { code_cache_type: CodeCacheType, source_hash: u64, ) -> Option>; + fn set_sync( &self, specifier: ModuleSpecifier, @@ -21,9 +22,4 @@ pub trait CodeCache: Send + Sync { source_hash: u64, data: &[u8], ); - - /// Gets if the code cache is still enabled. - fn enabled(&self) -> bool { - true - } } From 0957e0910bc50b6a97f0a62efaf3fcfa3a40082e Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 15:08:19 -0400 Subject: [PATCH 11/20] use distinct strategies for compile --- cli/standalone/code_cache.rs | 82 ++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs index 4050e49ea3789b..dfbc794bbffba5 100644 --- a/cli/standalone/code_cache.rs +++ b/cli/standalone/code_cache.rs @@ -27,7 +27,7 @@ enum CodeCacheStrategy { SubsequentRun(SubsequentRunCodeCacheStrategy), } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DenoCompileCodeCacheEntry { pub source_hash: u64, pub data: Vec, @@ -80,6 +80,9 @@ impl CodeCache for DenoCompileCodeCache { match &self.strategy { CodeCacheStrategy::FirstRun(strategy) => { if !strategy.is_finished.is_raised() { + // we keep track of how many times the cache is requested + // then serialize the cache when we get that number of + // "set" calls strategy.data.lock().add_count += 1; } None @@ -264,6 +267,18 @@ fn deserialize( file_path: &Path, cache_key: &str, ) -> Result, AnyError> { + // it's very important to use this below so that a corrupt cache file + // doesn't cause a memory allocation error + fn new_vec_sized( + capacity: usize, + default_value: T, + ) -> Result, AnyError> { + let mut vec = Vec::new(); + vec.try_reserve(capacity)?; + vec.resize(capacity, default_value); + Ok(vec) + } + let cache_file = std::fs::File::open(file_path)?; let mut reader = BufReader::new(cache_file); let mut header_bytes = vec![0; cache_key.len() + 4]; @@ -276,9 +291,7 @@ fn deserialize( u32::from_le_bytes(header_bytes[cache_key.len()..].try_into()?) as usize; // read the lengths for each entry found in the file let entry_len_bytes_capacity = len * 8; - let mut entry_len_bytes = Vec::new(); - entry_len_bytes.try_reserve(entry_len_bytes_capacity)?; - entry_len_bytes.resize(entry_len_bytes_capacity, 0); + let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?; reader.read_exact(&mut entry_len_bytes)?; let mut lengths = Vec::new(); lengths.try_reserve(len)?; @@ -292,10 +305,7 @@ fn deserialize( let mut map = HashMap::new(); map.try_reserve(len)?; for len in lengths { - let mut buffer = Vec::new(); - buffer.try_reserve(len)?; - buffer.resize(len, 0); - + let mut buffer = new_vec_sized(len, 0)?; reader.read_exact(&mut buffer)?; let entry_data_hash_start_pos = buffer.len() - 8; let expected_entry_data_hash = @@ -331,3 +341,59 @@ fn deserialize( Ok(map) } + +#[cfg(test)] +mod test { + use test_util::TempDir; + + use super::*; + use std::fs::File; + + #[test] + fn serialize_deserialize() { + let temp_dir = TempDir::new(); + let cache_key = "cache_key"; + let cache = { + let mut cache = HashMap::new(); + cache.insert( + "specifier1".to_string(), + DenoCompileCodeCacheEntry { + source_hash: 1, + data: vec![1, 2, 3], + }, + ); + cache.insert( + "specifier2".to_string(), + DenoCompileCodeCacheEntry { + source_hash: 2, + data: vec![4, 5, 6], + }, + ); + cache + }; + let file_path = temp_dir.path().join("cache.bin").to_path_buf(); + serialize(&file_path, cache_key, &cache).unwrap(); + let deserialized = deserialize(&file_path, cache_key).unwrap(); + assert_eq!(cache, deserialized); + } + + #[test] + fn serialize_deserialize_empty() { + let temp_dir = TempDir::new(); + let cache_key = "cache_key"; + let cache = HashMap::new(); + let file_path = temp_dir.path().join("cache.bin").to_path_buf(); + serialize(&file_path, cache_key, &cache).unwrap(); + let deserialized = deserialize(&file_path, cache_key).unwrap(); + assert_eq!(cache, deserialized); + } + + #[test] + fn serialize_deserialize_corrupt() { + let temp_dir = TempDir::new(); + let file_path = temp_dir.path().join("cache.bin").to_path_buf(); + std::fs::write(&file_path, b"corrupttestingtestingtesting").unwrap(); + let err = deserialize(&file_path, "cache-key").unwrap_err(); + assert_eq!(err.to_string(), "Cache key mismatch"); + } +} From bad329a848dd3fcd317d0195c5406de7107fa0d9 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 15:46:27 -0400 Subject: [PATCH 12/20] tests --- cli/standalone/binary.rs | 16 ++-- cli/standalone/code_cache.rs | 86 ++++++++++++++++--- cli/standalone/mod.rs | 2 +- .../compile/compile_cache/__test__.jsonc | 32 +++++++ .../specs/compile/compile_cache/first_run.out | 1 + tests/specs/compile/compile_cache/main.ts | 3 + .../compile/compile_cache/second_run.out | 1 + 7 files changed, 118 insertions(+), 23 deletions(-) create mode 100644 tests/specs/compile/compile_cache/__test__.jsonc create mode 100644 tests/specs/compile/compile_cache/first_run.out create mode 100644 tests/specs/compile/compile_cache/main.ts create mode 100644 tests/specs/compile/compile_cache/second_run.out diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 5a961cc6f10346..120015fb1cec8b 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -63,6 +63,7 @@ use crate::args::NpmInstallDepsProvider; use crate::args::PermissionFlags; use crate::args::UnstableConfig; use crate::cache::DenoDir; +use crate::cache::FastInsecureHasher; use crate::emit::Emitter; use crate::file_fetcher::FileFetcher; use crate::http_util::HttpClientProvider; @@ -172,9 +173,7 @@ pub struct SerializedWorkspaceResolver { pub struct Metadata { pub argv: Vec, pub seed: Option, - /// A randomly generated value that is used as the cache key for this - /// compilation. - pub cache_key: String, + pub code_cache_key: u64, pub permissions: PermissionFlags, pub location: Option, pub v8_flags: Vec, @@ -597,10 +596,15 @@ impl<'a> DenoCompileBinaryWriter<'a> { VfsBuilder::new(root_path.clone())? }; let mut remote_modules_store = RemoteModulesStoreBuilder::default(); + let mut code_cache_key_hasher = FastInsecureHasher::new_deno_versioned(); for module in graph.modules() { if module.specifier().scheme() == "data" { continue; // don't store data urls as an entry as they're in the code } + if let Some(source) = module.source() { + code_cache_key_hasher.write(module.specifier().as_str().as_bytes()); + code_cache_key_hasher.write(source.as_bytes()); + } let (maybe_source, media_type) = match module { deno_graph::Module::Js(m) => { // todo(https://github.com/denoland/deno_media_type/pull/12): use is_emittable() @@ -660,11 +664,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { let metadata = Metadata { argv: compile_flags.args.clone(), seed: cli_options.seed(), - cache_key: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos() - .to_string(), + code_cache_key: code_cache_key_hasher.finish(), location: cli_options.location_flag().clone(), permissions: cli_options.permission_flags().clone(), v8_flags: cli_options.v8_flags().clone(), diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs index dfbc794bbffba5..4ff90beec19aed 100644 --- a/cli/standalone/code_cache.rs +++ b/cli/standalone/code_cache.rs @@ -38,9 +38,9 @@ pub struct DenoCompileCodeCache { } impl DenoCompileCodeCache { - pub fn new(file_path: PathBuf, cache_key: String) -> Self { + pub fn new(file_path: PathBuf, cache_key: u64) -> Self { // attempt to deserialize the cache data - match deserialize(&file_path, &cache_key) { + match deserialize(&file_path, cache_key) { Ok(data) => { log::debug!("Loaded {} code cache entries", data.len()); Self { @@ -167,7 +167,7 @@ struct FirstRunCodeCacheData { } struct FirstRunCodeCacheStrategy { - cache_key: String, + cache_key: u64, file_path: PathBuf, is_finished: AtomicFlag, data: Mutex, @@ -180,7 +180,7 @@ impl FirstRunCodeCacheStrategy { ) { let count = cache_data.len(); let temp_file = get_atomic_file_path(&self.file_path); - match serialize(&temp_file, &self.cache_key, cache_data) { + match serialize(&temp_file, self.cache_key, cache_data) { Ok(()) => { if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { log::debug!("Failed to rename code cache: {}", err); @@ -229,7 +229,7 @@ impl SubsequentRunCodeCacheStrategy { /// - fn serialize( file_path: &Path, - cache_key: &str, + cache_key: u64, cache: &HashMap, ) -> Result<(), AnyError> { let cache_file = std::fs::OpenOptions::new() @@ -239,7 +239,7 @@ fn serialize( .open(file_path)?; let mut writer = BufWriter::new(cache_file); // header - writer.write_all(cache_key.as_bytes())?; + writer.write_all(&cache_key.to_le_bytes())?; writer.write_all(&(cache.len() as u32).to_le_bytes())?; // lengths of each entry for (specifier, entry) in cache { @@ -265,7 +265,7 @@ fn serialize( fn deserialize( file_path: &Path, - cache_key: &str, + expected_cache_key: u64, ) -> Result, AnyError> { // it's very important to use this below so that a corrupt cache file // doesn't cause a memory allocation error @@ -281,14 +281,14 @@ fn deserialize( let cache_file = std::fs::File::open(file_path)?; let mut reader = BufReader::new(cache_file); - let mut header_bytes = vec![0; cache_key.len() + 4]; + let mut header_bytes = vec![0; 8 + 4]; reader.read_exact(&mut header_bytes)?; - if &header_bytes[..cache_key.len()] != cache_key.as_bytes() { + let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?); + if actual_cache_key != expected_cache_key { // cache bust bail!("Cache key mismatch"); } - let len = - u32::from_le_bytes(header_bytes[cache_key.len()..].try_into()?) as usize; + let len = u32::from_le_bytes(header_bytes[8..].try_into()?) as usize; // read the lengths for each entry found in the file let entry_len_bytes_capacity = len * 8; let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?; @@ -352,7 +352,7 @@ mod test { #[test] fn serialize_deserialize() { let temp_dir = TempDir::new(); - let cache_key = "cache_key"; + let cache_key = 123456; let cache = { let mut cache = HashMap::new(); cache.insert( @@ -380,7 +380,7 @@ mod test { #[test] fn serialize_deserialize_empty() { let temp_dir = TempDir::new(); - let cache_key = "cache_key"; + let cache_key = 1234; let cache = HashMap::new(); let file_path = temp_dir.path().join("cache.bin").to_path_buf(); serialize(&file_path, cache_key, &cache).unwrap(); @@ -393,7 +393,65 @@ mod test { let temp_dir = TempDir::new(); let file_path = temp_dir.path().join("cache.bin").to_path_buf(); std::fs::write(&file_path, b"corrupttestingtestingtesting").unwrap(); - let err = deserialize(&file_path, "cache-key").unwrap_err(); + let err = deserialize(&file_path, 1234).unwrap_err(); assert_eq!(err.to_string(), "Cache key mismatch"); } + + #[test] + fn code_cache() { + let temp_dir = TempDir::new(); + let file_path = temp_dir.path().join("cache.bin").to_path_buf(); + let url1 = ModuleSpecifier::parse("https://deno.land/example1.js").unwrap(); + let url2 = ModuleSpecifier::parse("https://deno.land/example2.js").unwrap(); + // first run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + assert!(code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .is_none()); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .is_none()); + code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]); + assert!(!file_path.exists()); + code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]); + assert!(file_path.exists()); // now the new code cache exists + } + // second run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + let result1 = code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .unwrap(); + let result2 = code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .unwrap(); + assert_eq!(result1, vec![1, 2, 3]); + assert_eq!(result2, vec![2, 1, 3]); + } + + // new cache key first run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); + assert!(code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .is_none()); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .is_none()); + code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[2, 2, 3]); + code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[3, 2, 3]); + } + // new cache key second run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); + let result1 = code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .unwrap(); + assert_eq!(result1, vec![2, 2, 3]); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 5) // different hash will cause none + .is_none()); + } + } } diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 89b9393ef5abcb..14e2b93b835f64 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -620,7 +620,7 @@ pub async fn run(data: StandaloneData) -> Result { "{}.cache", root_path.file_name().unwrap().to_string_lossy() )), - metadata.cache_key, + metadata.code_cache_key, )); let module_loader_factory = StandaloneModuleLoaderFactory { shared: Arc::new(SharedModuleLoaderState { diff --git a/tests/specs/compile/compile_cache/__test__.jsonc b/tests/specs/compile/compile_cache/__test__.jsonc new file mode 100644 index 00000000000000..90f009930762fe --- /dev/null +++ b/tests/specs/compile/compile_cache/__test__.jsonc @@ -0,0 +1,32 @@ +{ + "tempDir": true, + "steps": [{ + "if": "unix", + "args": "compile --output main --log-level=debug main.ts", + "output": "[WILDCARD]" + }, { + "if": "unix", + "commandName": "./main", + "args": [], + "output": "first_run.out" + }, { + "if": "unix", + "commandName": "./main", + "args": [], + "output": "second_run.out" + }, { + "if": "windows", + "args": "compile --output main.exe --log-level=debug main.ts", + "output": "[WILDCARD]" + }, { + "if": "windows", + "commandName": "./main.exe", + "args": [], + "output": "first_run.out" + }, { + "if": "windows", + "commandName": "./main.exe", + "args": [], + "output": "second_run.out" + }] +} diff --git a/tests/specs/compile/compile_cache/first_run.out b/tests/specs/compile/compile_cache/first_run.out new file mode 100644 index 00000000000000..ca13e088f04428 --- /dev/null +++ b/tests/specs/compile/compile_cache/first_run.out @@ -0,0 +1 @@ +[WILDCARD]Serialized 9 code cache entries[WILDCARD] \ No newline at end of file diff --git a/tests/specs/compile/compile_cache/main.ts b/tests/specs/compile/compile_cache/main.ts new file mode 100644 index 00000000000000..b1d552d30a9e03 --- /dev/null +++ b/tests/specs/compile/compile_cache/main.ts @@ -0,0 +1,3 @@ +import { join } from "jsr:@std/url@0.220/join"; + +console.log(join); diff --git a/tests/specs/compile/compile_cache/second_run.out b/tests/specs/compile/compile_cache/second_run.out new file mode 100644 index 00000000000000..5f6afcf7e6f75e --- /dev/null +++ b/tests/specs/compile/compile_cache/second_run.out @@ -0,0 +1 @@ +[WILDCARD]Loaded 9 code cache entries[WILDCARD][Function: join][WILDCARD] \ No newline at end of file From e9e4ad2bbac6ca467b2ab29e8eaa54250dea2dc7 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 16:20:59 -0400 Subject: [PATCH 13/20] support --no-code-cache --- cli/args/flags.rs | 3 + cli/standalone/binary.rs | 18 +++-- cli/standalone/code_cache.rs | 66 ++++++++++++++----- cli/standalone/mod.rs | 47 ++++++++----- runtime/code_cache.rs | 1 + .../__test__.jsonc | 0 .../first_run.out | 0 .../{compile_cache => code_cache}/main.ts | 0 .../second_run.out | 0 .../compile/no_code_cache/__test__.jsonc | 22 +++++++ tests/specs/compile/no_code_cache/main.out | 1 + tests/specs/compile/no_code_cache/main.ts | 3 + 12 files changed, 120 insertions(+), 41 deletions(-) rename tests/specs/compile/{compile_cache => code_cache}/__test__.jsonc (100%) rename tests/specs/compile/{compile_cache => code_cache}/first_run.out (100%) rename tests/specs/compile/{compile_cache => code_cache}/main.ts (100%) rename tests/specs/compile/{compile_cache => code_cache}/second_run.out (100%) create mode 100644 tests/specs/compile/no_code_cache/__test__.jsonc create mode 100644 tests/specs/compile/no_code_cache/main.out create mode 100644 tests/specs/compile/no_code_cache/main.ts diff --git a/cli/args/flags.rs b/cli/args/flags.rs index 5c2f83561f72e4..364a6c5c551704 100644 --- a/cli/args/flags.rs +++ b/cli/args/flags.rs @@ -1921,6 +1921,7 @@ On the first invocation with deno will download the proper binary and cache it i ]) .help_heading(COMPILE_HEADING), ) + .arg(no_code_cache_arg()) .arg( Arg::new("no-terminal") .long("no-terminal") @@ -4412,6 +4413,8 @@ fn compile_parse( }; ext_arg_parse(flags, matches); + flags.code_cache_enabled = !matches.get_flag("no-code-cache"); + flags.subcommand = DenoSubcommand::Compile(CompileFlags { source_file, output, diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 120015fb1cec8b..2e5531f9586b1c 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -173,7 +173,7 @@ pub struct SerializedWorkspaceResolver { pub struct Metadata { pub argv: Vec, pub seed: Option, - pub code_cache_key: u64, + pub code_cache_key: Option, pub permissions: PermissionFlags, pub location: Option, pub v8_flags: Vec, @@ -596,14 +596,20 @@ impl<'a> DenoCompileBinaryWriter<'a> { VfsBuilder::new(root_path.clone())? }; let mut remote_modules_store = RemoteModulesStoreBuilder::default(); - let mut code_cache_key_hasher = FastInsecureHasher::new_deno_versioned(); + let mut code_cache_key_hasher = if cli_options.code_cache_enabled() { + Some(FastInsecureHasher::new_deno_versioned()) + } else { + None + }; for module in graph.modules() { if module.specifier().scheme() == "data" { continue; // don't store data urls as an entry as they're in the code } - if let Some(source) = module.source() { - code_cache_key_hasher.write(module.specifier().as_str().as_bytes()); - code_cache_key_hasher.write(source.as_bytes()); + if let Some(hasher) = &mut code_cache_key_hasher { + if let Some(source) = module.source() { + hasher.write(module.specifier().as_str().as_bytes()); + hasher.write(source.as_bytes()); + } } let (maybe_source, media_type) = match module { deno_graph::Module::Js(m) => { @@ -664,7 +670,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { let metadata = Metadata { argv: compile_flags.args.clone(), seed: cli_options.seed(), - code_cache_key: code_cache_key_hasher.finish(), + code_cache_key: code_cache_key_hasher.map(|h| h.finish()), location: cli_options.location_flag().clone(), permissions: cli_options.permission_flags().clone(), v8_flags: cli_options.v8_flags().clone(), diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs index 4ff90beec19aed..fc256a9ec0761f 100644 --- a/cli/standalone/code_cache.rs +++ b/cli/standalone/code_cache.rs @@ -74,7 +74,7 @@ impl CodeCache for DenoCompileCodeCache { fn get_sync( &self, specifier: &ModuleSpecifier, - _code_cache_type: CodeCacheType, + code_cache_type: CodeCacheType, source_hash: u64, ) -> Option> { match &self.strategy { @@ -91,7 +91,7 @@ impl CodeCache for DenoCompileCodeCache { if strategy.is_finished.is_raised() { return None; } - strategy.take_from_cache(specifier, source_hash) + strategy.take_from_cache(specifier, code_cache_type, source_hash) } } } @@ -99,7 +99,7 @@ impl CodeCache for DenoCompileCodeCache { fn set_sync( &self, specifier: ModuleSpecifier, - _code_cache_type: CodeCacheType, + code_cache_type: CodeCacheType, source_hash: u64, bytes: &[u8], ) { @@ -112,7 +112,7 @@ impl CodeCache for DenoCompileCodeCache { let data_to_serialize = { let mut data = strategy.data.lock(); data.cache.insert( - specifier.to_string(), + (specifier.to_string(), code_cache_type), DenoCompileCodeCacheEntry { source_hash, data: bytes.to_vec(), @@ -161,8 +161,10 @@ impl CliCodeCache for DenoCompileCodeCache { } } +type CodeCacheKey = (String, CodeCacheType); + struct FirstRunCodeCacheData { - cache: HashMap, + cache: HashMap, add_count: usize, } @@ -176,7 +178,7 @@ struct FirstRunCodeCacheStrategy { impl FirstRunCodeCacheStrategy { fn write_cache_data( &self, - cache_data: &HashMap, + cache_data: &HashMap, ) { let count = cache_data.len(); let temp_file = get_atomic_file_path(&self.file_path); @@ -198,20 +200,25 @@ impl FirstRunCodeCacheStrategy { struct SubsequentRunCodeCacheStrategy { is_finished: AtomicFlag, - data: Mutex>, + data: Mutex>, } impl SubsequentRunCodeCacheStrategy { fn take_from_cache( &self, specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, source_hash: u64, ) -> Option> { let mut data = self.data.lock(); - let entry = data.remove(specifier.as_str())?; + // todo(dsherret): how to avoid the clone here? + let entry = data.remove(&(specifier.to_string(), code_cache_type))?; if entry.source_hash != source_hash { return None; } + if data.is_empty() { + self.is_finished.raise(); + } Some(entry.data) } } @@ -224,13 +231,14 @@ impl SubsequentRunCodeCacheStrategy { /// - <[entry]> /// - <[u8]: entry data> /// - +/// - : code cache type /// - /// - /// - fn serialize( file_path: &Path, cache_key: u64, - cache: &HashMap, + cache: &HashMap, ) -> Result<(), AnyError> { let cache_file = std::fs::OpenOptions::new() .create(true) @@ -242,13 +250,18 @@ fn serialize( writer.write_all(&cache_key.to_le_bytes())?; writer.write_all(&(cache.len() as u32).to_le_bytes())?; // lengths of each entry - for (specifier, entry) in cache { - let len: u64 = entry.data.len() as u64 + specifier.len() as u64 + 4 + 8 + 8; + for ((specifier, _), entry) in cache { + let len: u64 = + entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8; writer.write_all(&len.to_le_bytes())?; } // entries - for (specifier, entry) in cache { + for ((specifier, code_cache_type), entry) in cache { writer.write_all(&entry.data)?; + writer.write_all(&[match code_cache_type { + CodeCacheType::EsModule => 0, + CodeCacheType::Script => 1, + }])?; writer.write_all(specifier.as_bytes())?; writer.write_all(&(specifier.len() as u32).to_le_bytes())?; writer.write_all(&entry.source_hash.to_le_bytes())?; @@ -266,7 +279,7 @@ fn serialize( fn deserialize( file_path: &Path, expected_cache_key: u64, -) -> Result, AnyError> { +) -> Result, AnyError> { // it's very important to use this below so that a corrupt cache file // doesn't cause a memory allocation error fn new_vec_sized( @@ -322,7 +335,13 @@ fn deserialize( let specifier = String::from_utf8( buffer[specifier_start_pos..specifier_end_pos].to_vec(), )?; - buffer.truncate(specifier_start_pos); + let code_cache_type_pos = specifier_start_pos - 1; + let code_cache_type = match buffer[code_cache_type_pos] { + 0 => CodeCacheType::EsModule, + 1 => CodeCacheType::Script, + _ => bail!("Invalid code cache type"), + }; + buffer.truncate(code_cache_type_pos); let actual_entry_data_hash: u64 = FastInsecureHasher::new_without_deno_version() .write(&buffer) @@ -331,7 +350,7 @@ fn deserialize( bail!("Hash mismatch.") } map.insert( - specifier, + (specifier, code_cache_type), DenoCompileCodeCacheEntry { source_hash, data: buffer, @@ -356,19 +375,26 @@ mod test { let cache = { let mut cache = HashMap::new(); cache.insert( - "specifier1".to_string(), + ("specifier1".to_string(), CodeCacheType::EsModule), DenoCompileCodeCacheEntry { source_hash: 1, data: vec![1, 2, 3], }, ); cache.insert( - "specifier2".to_string(), + ("specifier2".to_string(), CodeCacheType::EsModule), DenoCompileCodeCacheEntry { source_hash: 2, data: vec![4, 5, 6], }, ); + cache.insert( + ("specifier2".to_string(), CodeCacheType::Script), + DenoCompileCodeCacheEntry { + source_hash: 2, + data: vec![6, 5, 1], + }, + ); cache }; let file_path = temp_dir.path().join("cache.bin").to_path_buf(); @@ -412,20 +438,26 @@ mod test { assert!(code_cache .get_sync(&url2, CodeCacheType::EsModule, 1) .is_none()); + assert!(code_cache.enabled()); code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]); + assert!(code_cache.enabled()); assert!(!file_path.exists()); code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]); assert!(file_path.exists()); // now the new code cache exists + assert!(!code_cache.enabled()); // no longer enabled } // second run { let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + assert!(code_cache.enabled()); let result1 = code_cache .get_sync(&url1, CodeCacheType::EsModule, 0) .unwrap(); + assert!(code_cache.enabled()); let result2 = code_cache .get_sync(&url2, CodeCacheType::EsModule, 1) .unwrap(); + assert!(!code_cache.enabled()); // no longer enabled assert_eq!(result1, vec![1, 2, 3]); assert_eq!(result2, vec![2, 1, 3]); } diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 14e2b93b835f64..595364b545572c 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -101,7 +101,7 @@ struct SharedModuleLoaderState { workspace_resolver: WorkspaceResolver, node_resolver: Arc, npm_module_loader: Arc, - code_cache: Arc, + code_cache: Option>, } impl SharedModuleLoaderState { @@ -110,14 +110,17 @@ impl SharedModuleLoaderState { specifier: &ModuleSpecifier, source: &[u8], ) -> Option { - if !self.code_cache.enabled() { + let Some(code_cache) = &self.code_cache else { + return None; + }; + if !code_cache.enabled() { return None; } // deno version is already included in the root cache key let hash = FastInsecureHasher::new_without_deno_version() .write_hashable(source) .finish(); - let data = self.code_cache.get_sync( + let data = code_cache.get_sync( specifier, deno_runtime::code_cache::CodeCacheType::EsModule, hash, @@ -382,14 +385,16 @@ impl ModuleLoader for EmbeddedModuleLoader { &self, specifier: ModuleSpecifier, source_hash: u64, - code_cache: &[u8], + code_cache_data: &[u8], ) -> LocalBoxFuture<'static, ()> { - self.shared.code_cache.set_sync( - specifier, - deno_runtime::code_cache::CodeCacheType::EsModule, - source_hash, - code_cache, - ); + if let Some(code_cache) = &self.shared.code_cache { + code_cache.set_sync( + specifier, + deno_runtime::code_cache::CodeCacheType::EsModule, + source_hash, + code_cache_data, + ); + } std::future::ready(()).boxed_local() } } @@ -615,13 +620,19 @@ pub async fn run(data: StandaloneData) -> Result { metadata.workspace_resolver.pkg_json_resolution, ) }; - let code_cache = Arc::new(DenoCompileCodeCache::new( - root_path.with_file_name(format!( - "{}.cache", - root_path.file_name().unwrap().to_string_lossy() - )), - metadata.code_cache_key, - )); + let code_cache = match metadata.code_cache_key { + Some(code_cache_key) => Some(Arc::new(DenoCompileCodeCache::new( + root_path.with_file_name(format!( + "{}.cache", + root_path.file_name().unwrap().to_string_lossy() + )), + code_cache_key, + )) as Arc), + None => { + log::debug!("Code cache disabled."); + None + } + }; let module_loader_factory = StandaloneModuleLoaderFactory { shared: Arc::new(SharedModuleLoaderState { modules, @@ -673,7 +684,7 @@ pub async fn run(data: StandaloneData) -> Result { let worker_factory = CliMainWorkerFactory::new( Arc::new(BlobStore::default()), cjs_resolutions, - Some(code_cache), + code_cache, feature_checker, fs, None, diff --git a/runtime/code_cache.rs b/runtime/code_cache.rs index ff656a01883c11..b4a7ce188f5cdf 100644 --- a/runtime/code_cache.rs +++ b/runtime/code_cache.rs @@ -2,6 +2,7 @@ use deno_core::ModuleSpecifier; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CodeCacheType { EsModule, Script, diff --git a/tests/specs/compile/compile_cache/__test__.jsonc b/tests/specs/compile/code_cache/__test__.jsonc similarity index 100% rename from tests/specs/compile/compile_cache/__test__.jsonc rename to tests/specs/compile/code_cache/__test__.jsonc diff --git a/tests/specs/compile/compile_cache/first_run.out b/tests/specs/compile/code_cache/first_run.out similarity index 100% rename from tests/specs/compile/compile_cache/first_run.out rename to tests/specs/compile/code_cache/first_run.out diff --git a/tests/specs/compile/compile_cache/main.ts b/tests/specs/compile/code_cache/main.ts similarity index 100% rename from tests/specs/compile/compile_cache/main.ts rename to tests/specs/compile/code_cache/main.ts diff --git a/tests/specs/compile/compile_cache/second_run.out b/tests/specs/compile/code_cache/second_run.out similarity index 100% rename from tests/specs/compile/compile_cache/second_run.out rename to tests/specs/compile/code_cache/second_run.out diff --git a/tests/specs/compile/no_code_cache/__test__.jsonc b/tests/specs/compile/no_code_cache/__test__.jsonc new file mode 100644 index 00000000000000..185748a18e40c3 --- /dev/null +++ b/tests/specs/compile/no_code_cache/__test__.jsonc @@ -0,0 +1,22 @@ +{ + "tempDir": true, + "steps": [{ + "if": "unix", + "args": "compile --output main --no-code-cache --log-level=debug main.ts", + "output": "[WILDCARD]" + }, { + "if": "unix", + "commandName": "./main", + "args": [], + "output": "main.out" + }, { + "if": "windows", + "args": "compile --output main.exe --no-code-cache --log-level=debug main.ts", + "output": "[WILDCARD]" + }, { + "if": "windows", + "commandName": "./main.exe", + "args": [], + "output": "main.out" + }] +} diff --git a/tests/specs/compile/no_code_cache/main.out b/tests/specs/compile/no_code_cache/main.out new file mode 100644 index 00000000000000..e91f132825e050 --- /dev/null +++ b/tests/specs/compile/no_code_cache/main.out @@ -0,0 +1 @@ +[WILDCARD]Code cache disabled.[WILDCARD] \ No newline at end of file diff --git a/tests/specs/compile/no_code_cache/main.ts b/tests/specs/compile/no_code_cache/main.ts new file mode 100644 index 00000000000000..b1d552d30a9e03 --- /dev/null +++ b/tests/specs/compile/no_code_cache/main.ts @@ -0,0 +1,3 @@ +import { join } from "jsr:@std/url@0.220/join"; + +console.log(join); From 8a910fd2c24b98d051482e1c4c983a0a9046d71f Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 16:21:41 -0400 Subject: [PATCH 14/20] lint --- cli/standalone/code_cache.rs | 2 +- cli/standalone/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs index fc256a9ec0761f..56825f65719381 100644 --- a/cli/standalone/code_cache.rs +++ b/cli/standalone/code_cache.rs @@ -134,7 +134,7 @@ impl CodeCache for DenoCompileCodeCache { } }; if let Some(cache_data) = &data_to_serialize { - strategy.write_cache_data(&cache_data); + strategy.write_cache_data(cache_data); } } CodeCacheStrategy::SubsequentRun(_) => { diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 595364b545572c..6639c70cb65c40 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -361,7 +361,7 @@ impl ModuleLoader for EmbeddedModuleLoader { module.into_for_v8(); let code_cache_entry = self .shared - .get_code_cache(&module_specifier, module_source.as_bytes()); + .get_code_cache(module_specifier, module_source.as_bytes()); deno_core::ModuleLoadResponse::Sync(Ok( deno_core::ModuleSource::new_with_redirect( module_type, From 815be3a6a0f252e5a042afdcc23e595c32e6d7fc Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 16:23:42 -0400 Subject: [PATCH 15/20] remove unused use --- cli/standalone/binary.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 2e5531f9586b1c..044db277a5894a 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -54,7 +54,6 @@ use indexmap::IndexMap; use log::Level; use serde::Deserialize; use serde::Serialize; -use windows_sys::Wdk::System; use crate::args::CaData; use crate::args::CliOptions; From ec0d0c793cdddae0a70df7976e1bc62d593aecf9 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 16:47:08 -0400 Subject: [PATCH 16/20] fix test --- cli/args/flags.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/args/flags.rs b/cli/args/flags.rs index fce5e9a8679350..efe824309b7608 100644 --- a/cli/args/flags.rs +++ b/cli/args/flags.rs @@ -10001,6 +10001,7 @@ mod tests { include: vec![] }), type_check_mode: TypeCheckMode::Local, + code_cache_enabled: true, ..Flags::default() } ); @@ -10009,7 +10010,7 @@ mod tests { #[test] fn compile_with_flags() { #[rustfmt::skip] - let r = flags_from_vec(svec!["deno", "compile", "--import-map", "import_map.json", "--no-remote", "--config", "tsconfig.json", "--no-check", "--unsafely-ignore-certificate-errors", "--reload", "--lock", "lock.json", "--cert", "example.crt", "--cached-only", "--location", "https:foo", "--allow-read", "--allow-net", "--v8-flags=--help", "--seed", "1", "--no-terminal", "--icon", "favicon.ico", "--output", "colors", "--env=.example.env", "https://examples.deno.land/color-logging.ts", "foo", "bar", "-p", "8080"]); + let r = flags_from_vec(svec!["deno", "compile", "--import-map", "import_map.json", "--no-code-cache", "--no-remote", "--config", "tsconfig.json", "--no-check", "--unsafely-ignore-certificate-errors", "--reload", "--lock", "lock.json", "--cert", "example.crt", "--cached-only", "--location", "https:foo", "--allow-read", "--allow-net", "--v8-flags=--help", "--seed", "1", "--no-terminal", "--icon", "favicon.ico", "--output", "colors", "--env=.example.env", "https://examples.deno.land/color-logging.ts", "foo", "bar", "-p", "8080"]); assert_eq!( r.unwrap(), Flags { @@ -10025,6 +10026,7 @@ mod tests { }), import_map_path: Some("import_map.json".to_string()), no_remote: true, + code_cache_enabled: false, config_flag: ConfigFlag::Path("tsconfig.json".to_owned()), type_check_mode: TypeCheckMode::None, reload: true, From 11401056e82ee807e7935ffbba16263b2141b0a6 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 17:11:15 -0400 Subject: [PATCH 17/20] Do not subtract with overflow when deserializing. --- cli/standalone/code_cache.rs | 63 +++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs index 56825f65719381..25b490544cf1d7 100644 --- a/cli/standalone/code_cache.rs +++ b/cli/standalone/code_cache.rs @@ -246,6 +246,14 @@ fn serialize( .write(true) .open(file_path)?; let mut writer = BufWriter::new(cache_file); + serialize_with_writer(&mut writer, cache_key, cache) +} + +fn serialize_with_writer( + writer: &mut BufWriter, + cache_key: u64, + cache: &HashMap, +) -> Result<(), AnyError> { // header writer.write_all(&cache_key.to_le_bytes())?; writer.write_all(&(cache.len() as u32).to_le_bytes())?; @@ -279,6 +287,15 @@ fn serialize( fn deserialize( file_path: &Path, expected_cache_key: u64, +) -> Result, AnyError> { + let cache_file = std::fs::File::open(file_path)?; + let mut reader = BufReader::new(cache_file); + deserialize_with_reader(&mut reader, expected_cache_key) +} + +fn deserialize_with_reader( + reader: &mut BufReader, + expected_cache_key: u64, ) -> Result, AnyError> { // it's very important to use this below so that a corrupt cache file // doesn't cause a memory allocation error @@ -292,8 +309,13 @@ fn deserialize( Ok(vec) } - let cache_file = std::fs::File::open(file_path)?; - let mut reader = BufReader::new(cache_file); + fn try_subtract(a: usize, b: usize) -> Result { + if a < b { + bail!("Integer underflow"); + } + Ok(a - b) + } + let mut header_bytes = vec![0; 8 + 4]; reader.read_exact(&mut header_bytes)?; let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?); @@ -320,22 +342,22 @@ fn deserialize( for len in lengths { let mut buffer = new_vec_sized(len, 0)?; reader.read_exact(&mut buffer)?; - let entry_data_hash_start_pos = buffer.len() - 8; + let entry_data_hash_start_pos = try_subtract(buffer.len(), 8)?; let expected_entry_data_hash = u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?); - let source_hash_start_pos = entry_data_hash_start_pos - 8; + let source_hash_start_pos = try_subtract(entry_data_hash_start_pos, 8)?; let source_hash = u64::from_le_bytes( buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?, ); - let specifier_end_pos = source_hash_start_pos - 4; + let specifier_end_pos = try_subtract(source_hash_start_pos, 4)?; let specifier_len = u32::from_le_bytes( buffer[specifier_end_pos..source_hash_start_pos].try_into()?, ) as usize; - let specifier_start_pos = specifier_end_pos - specifier_len; + let specifier_start_pos = try_subtract(specifier_end_pos, specifier_len)?; let specifier = String::from_utf8( buffer[specifier_start_pos..specifier_end_pos].to_vec(), )?; - let code_cache_type_pos = specifier_start_pos - 1; + let code_cache_type_pos = try_subtract(specifier_start_pos, 1)?; let code_cache_type = match buffer[code_cache_type_pos] { 0 => CodeCacheType::EsModule, 1 => CodeCacheType::Script, @@ -370,7 +392,6 @@ mod test { #[test] fn serialize_deserialize() { - let temp_dir = TempDir::new(); let cache_key = 123456; let cache = { let mut cache = HashMap::new(); @@ -397,29 +418,33 @@ mod test { ); cache }; - let file_path = temp_dir.path().join("cache.bin").to_path_buf(); - serialize(&file_path, cache_key, &cache).unwrap(); - let deserialized = deserialize(&file_path, cache_key).unwrap(); + let mut buffer = Vec::new(); + serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) + .unwrap(); + let deserialized = + deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) + .unwrap(); assert_eq!(cache, deserialized); } #[test] fn serialize_deserialize_empty() { - let temp_dir = TempDir::new(); let cache_key = 1234; let cache = HashMap::new(); - let file_path = temp_dir.path().join("cache.bin").to_path_buf(); - serialize(&file_path, cache_key, &cache).unwrap(); - let deserialized = deserialize(&file_path, cache_key).unwrap(); + let mut buffer = Vec::new(); + serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) + .unwrap(); + let deserialized = + deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) + .unwrap(); assert_eq!(cache, deserialized); } #[test] fn serialize_deserialize_corrupt() { - let temp_dir = TempDir::new(); - let file_path = temp_dir.path().join("cache.bin").to_path_buf(); - std::fs::write(&file_path, b"corrupttestingtestingtesting").unwrap(); - let err = deserialize(&file_path, 1234).unwrap_err(); + let buffer = "corrupttestingtestingtesting".as_bytes().to_vec(); + let err = deserialize_with_reader(&mut BufReader::new(&buffer[..]), 1234) + .unwrap_err(); assert_eq!(err.to_string(), "Cache key mismatch"); } From 5b13157d00c51cb812435f3895ef55f286e93cf4 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 24 Oct 2024 17:27:32 -0400 Subject: [PATCH 18/20] maybe fix test failing because they had the same binary name --- tests/specs/compile/code_cache/__test__.jsonc | 12 ++++++------ tests/specs/compile/no_code_cache/__test__.jsonc | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/specs/compile/code_cache/__test__.jsonc b/tests/specs/compile/code_cache/__test__.jsonc index 90f009930762fe..72353e27daeb52 100644 --- a/tests/specs/compile/code_cache/__test__.jsonc +++ b/tests/specs/compile/code_cache/__test__.jsonc @@ -2,30 +2,30 @@ "tempDir": true, "steps": [{ "if": "unix", - "args": "compile --output main --log-level=debug main.ts", + "args": "compile --output using_code_cache --log-level=debug main.ts", "output": "[WILDCARD]" }, { "if": "unix", - "commandName": "./main", + "commandName": "./using_code_cache", "args": [], "output": "first_run.out" }, { "if": "unix", - "commandName": "./main", + "commandName": "./using_code_cache", "args": [], "output": "second_run.out" }, { "if": "windows", - "args": "compile --output main.exe --log-level=debug main.ts", + "args": "compile --output using_code_cache.exe --log-level=debug main.ts", "output": "[WILDCARD]" }, { "if": "windows", - "commandName": "./main.exe", + "commandName": "./using_code_cache.exe", "args": [], "output": "first_run.out" }, { "if": "windows", - "commandName": "./main.exe", + "commandName": "./using_code_cache.exe", "args": [], "output": "second_run.out" }] diff --git a/tests/specs/compile/no_code_cache/__test__.jsonc b/tests/specs/compile/no_code_cache/__test__.jsonc index 185748a18e40c3..2589054ecb6739 100644 --- a/tests/specs/compile/no_code_cache/__test__.jsonc +++ b/tests/specs/compile/no_code_cache/__test__.jsonc @@ -2,20 +2,20 @@ "tempDir": true, "steps": [{ "if": "unix", - "args": "compile --output main --no-code-cache --log-level=debug main.ts", + "args": "compile --output no_code_cache --no-code-cache --log-level=debug main.ts", "output": "[WILDCARD]" }, { "if": "unix", - "commandName": "./main", + "commandName": "./no_code_cache", "args": [], "output": "main.out" }, { "if": "windows", - "args": "compile --output main.exe --no-code-cache --log-level=debug main.ts", + "args": "compile --output no_code_cache.exe --no-code-cache --log-level=debug main.ts", "output": "[WILDCARD]" }, { "if": "windows", - "commandName": "./main.exe", + "commandName": "./no_code_cache.exe", "args": [], "output": "main.out" }] From 488f6dcd9221a34b38f3c131a4777e49d4cba0bf Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 18 Nov 2024 13:14:23 -0500 Subject: [PATCH 19/20] update after merge --- cli/standalone/mod.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 32c691e2e7f240..d33d34524d5a0b 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -397,18 +397,6 @@ impl ModuleLoader for EmbeddedModuleLoader { let media_type = module.media_type; let (module_specifier, module_type, module_source) = module.into_parts(); - let code_cache_entry = self - .shared - .get_code_cache(module_specifier, module_source.as_bytes()); - deno_core::ModuleLoadResponse::Sync(Ok( - deno_core::ModuleSource::new_with_redirect( - module_type, - module_source, - original_specifier, - module_specifier, - code_cache_entry, - ), - )) let is_maybe_cjs = match self .shared .cjs_tracker @@ -450,25 +438,30 @@ impl ModuleLoader for EmbeddedModuleLoader { ModuleSourceCode::String(FastString::from_static(source)) } }; + let code_cache_entry = shared + .get_code_cache(&module_specifier, module_source.as_bytes()); Ok(deno_core::ModuleSource::new_with_redirect( module_type, module_source, &original_specifier, &module_specifier, - None, + code_cache_entry, )) } .boxed_local(), ) } else { let module_source = module_source.into_for_v8(); + let code_cache_entry = self + .shared + .get_code_cache(&module_specifier, module_source.as_bytes()); deno_core::ModuleLoadResponse::Sync(Ok( deno_core::ModuleSource::new_with_redirect( module_type, module_source, original_specifier, module_specifier, - None, + code_cache_entry, ), )) } From 0e38fab191cb9216a2bb275670a5ac4b9e1efe62 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 18 Nov 2024 13:29:01 -0500 Subject: [PATCH 20/20] lint --- cli/standalone/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index d33d34524d5a0b..b9f0b1d5be0825 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -454,7 +454,7 @@ impl ModuleLoader for EmbeddedModuleLoader { let module_source = module_source.into_for_v8(); let code_cache_entry = self .shared - .get_code_cache(&module_specifier, module_source.as_bytes()); + .get_code_cache(module_specifier, module_source.as_bytes()); deno_core::ModuleLoadResponse::Sync(Ok( deno_core::ModuleSource::new_with_redirect( module_type,