From 868e57b74be78f1c5846ddf8b9a481b9d2c1fea7 Mon Sep 17 00:00:00 2001 From: Jaap Frolich Date: Fri, 13 Sep 2024 11:58:36 +0200 Subject: [PATCH] progress --- README.md | 18 +++++ src/bsconfig.rs | 10 +++ src/build.rs | 1 + src/build/build_types.rs | 46 ++++++++++- src/build/packages.rs | 8 ++ src/build/parse.rs | 171 +++++++++++++++++++++++++++++++++++++++ src/helpers.rs | 9 ++- 7 files changed, 255 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b873cd5..714c28c 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,25 @@ Running tests: -> /lib/ocaml/**generated**/MyModule.graphql.0.res -> /lib/ocaml/**generated**/MyModule.graphql.1.res +-> Parse the outputs of the embeds -> Determine the dependency tree (and add the embeds as deps) -> Run compiler +#### configuration of embeds + +- bsconfig.json + +```json + { + "embed-generators": [ + { + "name": "graphql", + "tags": ["graphql"], + "path": "./path/to/graphql/embed" + "package": "my-generator-package" + } + ] + } +``` + -> Profit diff --git a/src/bsconfig.rs b/src/bsconfig.rs index 3abcf40..45d2dce 100644 --- a/src/bsconfig.rs +++ b/src/bsconfig.rs @@ -157,6 +157,9 @@ pub struct Config { pub namespace: Option, pub jsx: Option, pub uncurried: Option, + #[serde(rename = "embed-generators")] + pub embed_generators: Option>, + // this is a new feature of rewatch, and it's not part of the bsconfig.json spec #[serde(rename = "namespace-entry")] pub namespace_entry: Option, @@ -164,6 +167,13 @@ pub struct Config { #[serde(rename = "allowed-dependents")] pub allowed_dependents: Option>, } +#[derive(Deserialize, Debug, Clone)] +pub struct EmbedGenerator { + pub name: String, + pub tags: Vec, + pub path: String, + pub package: Option, +} /// This flattens string flags pub fn flatten_flags(flags: &Option>>) -> Vec { diff --git a/src/build.rs b/src/build.rs index aece691..c5c8209 100644 --- a/src/build.rs +++ b/src/build.rs @@ -278,6 +278,7 @@ pub fn incremental_build( let timing_ast = Instant::now(); let result_asts = parse::generate_asts(build_state, || pb.inc(1)); + let result_asts = parse::generate_asts(build_state, || pb.inc(1)); let timing_ast_elapsed = timing_ast.elapsed(); match result_asts { diff --git a/src/build/build_types.rs b/src/build/build_types.rs index 6a7554b..c5904f8 100644 --- a/src/build/build_types.rs +++ b/src/build/build_types.rs @@ -1,5 +1,6 @@ use crate::build::packages::{Namespace, Package}; use ahash::{AHashMap, AHashSet}; +use serde::Deserialize; use std::time::SystemTime; #[derive(Debug, Clone, PartialEq)] @@ -35,11 +36,48 @@ pub struct Implementation { pub parse_dirty: bool, } +#[derive(Deserialize, Debug, Clone, PartialEq)] +pub struct Location { + line: u32, + col: u32, +} + +#[derive(Deserialize, Debug, Clone, PartialEq)] +pub struct EmbedLoc { + start: Location, + end: Location, +} + +// example of the *.embeds.json file +// [ +// { +// "tag": "sql.one", +// "filename": "Tst__sql_one_1.res", +// "contents": "\n SELECT * FROM tst.res\n WHERE id = 1\n", +// "loc": {"start": {"line": 1, "col": 22}, "end": {"line": 4, "col": 64}} +// }, +// { +// "tag": "sql.many", +// "filename": "Tst__sql_many_1.res", +// "contents": "\n SELECT * FROM tst.res\n WHERE id > 1\n", +// "loc": {"start": {"line": 6, "col": 86}, "end": {"line": 9, "col": 128}} +// }, +// { +// "tag": "sql.one", +// "filename": "Tst__sql_one_2.res", +// "contents": + +#[derive(Deserialize, Debug, Clone, PartialEq)] +pub struct EmbedJsonData { + pub tag: String, + pub filename: String, + pub contents: String, + pub loc: EmbedLoc, +} + #[derive(Debug, Clone, PartialEq)] pub struct Embed { - pub tag: String, - pub file_path: String, - pub content: String, + pub embed: EmbedJsonData, pub hash: String, pub dirty: bool, } @@ -48,7 +86,7 @@ pub struct Embed { pub struct SourceFile { pub implementation: Implementation, pub interface: Option, - pub embeds: Vec, + pub embeds: Vec, // Added embeds field } #[derive(Debug, Clone, PartialEq)] diff --git a/src/build/packages.rs b/src/build/packages.rs index b5b3ec1..f8c072b 100644 --- a/src/build/packages.rs +++ b/src/build/packages.rs @@ -50,6 +50,7 @@ pub struct Package { pub name: String, pub bsconfig: bsconfig::Config, pub source_folders: AHashSet, + pub generated_file_folder: PathBuf, // these are the relative file paths (relative to the package root) pub source_files: Option>, pub namespace: Namespace, @@ -381,6 +382,11 @@ fn make_package( source_files: None, namespace: bsconfig.get_namespace(), modules: None, + generated_file_folder: match &bsconfig.sources { + bsconfig::OneOrMore::Single(source) => PathBuf::from(bsconfig::to_qualified_without_children(source, None).dir).join("__generated__"), + bsconfig::OneOrMore::Multiple(sources) if !sources.is_empty() => PathBuf::from(bsconfig::to_qualified_without_children(&sources[0], None).dir).join("__generated__"), + _ => panic!("Error: Invalid or empty sources configuration in bsconfig.json. Please ensure at least one valid source is specified."), + }, // we canonicalize the path name so it's always the same path: PathBuf::from(package_path) .canonicalize() @@ -652,6 +658,7 @@ pub fn parse_packages(build_state: &mut BuildState) { parse_dirty: true, }, interface: None, + embeds: vec![], }), deps: AHashSet::new(), dependents: AHashSet::new(), @@ -705,6 +712,7 @@ pub fn parse_packages(build_state: &mut BuildState) { last_modified: metadata.modified, parse_dirty: true, }), + embeds: vec![], }), deps: AHashSet::new(), dependents: AHashSet::new(), diff --git a/src/build/parse.rs b/src/build/parse.rs index 678a4a3..de6bceb 100644 --- a/src/build/parse.rs +++ b/src/build/parse.rs @@ -10,6 +10,7 @@ use log::debug; use rayon::prelude::*; use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::SystemTime; pub fn generate_asts( build_state: &mut BuildState, @@ -80,6 +81,15 @@ pub fn generate_asts( ) }; + // After generating ASTs, handle embeds + // Process embeds for the source file + if let Err(err) = + process_embeds(build_state, package, source_file, &build_state.workspace_root) + { + has_failure = true; + stderr.push_str(&err); + } + (module_name.to_owned(), ast_result, iast_result, dirty) } } @@ -370,6 +380,167 @@ fn path_to_ast_extension(path: &Path) -> &str { } } +// Function to process embeds +fn process_embeds( + build_state: &mut BuildState, + package: &packages::Package, + source_file: &mut SourceFile, + workspace_root: &Option, +) -> Result<(), String> { + let source_file_path = &source_file.implementation.path; + + let ast_path = Path::new(&package.get_ast_path(&source_file.implementation.path)); + let embeds_json_path = ast_path.with_extension("embeds.json"); + + // Read and parse the embeds JSON file + if embeds_json_path.exists() { + let embeds_json = helpers::read_file(&embeds_json_path).map_err(|e| e.to_string())?; + let embeds_data: Vec = + serde_json::from_str(&embeds_json).map_err(|e| e.to_string())?; + + // Process each embed + let embeds = embeds_data + .into_iter() + .map(|embed_data| { + let embed_path = package.generated_file_folder.join(&embed_data.filename); + let hash = helpers::compute_string_hash(&embed_data.contents); + let dirty = is_embed_dirty(&embed_path, &embed_data, &hash.to_string()); + // embed_path is the path of the generated rescript file, let's add this path to the build state + // Add the embed_path as a rescript source file to the build state + let relative_path = Path::new(&embed_path) + .strip_prefix(&package.path) + .unwrap() + .to_string_lossy(); + let module_name = helpers::file_path_to_module_name(&relative_path, &package.namespace); + let last_modified = std::fs::metadata(&embed_path) + .and_then(|metadata| metadata.modified()) + .unwrap_or(SystemTime::now()); + + if dirty { + // run the embed file + // Find the embed generator based on the tag + if let Some(embed_generator) = + package.bsconfig.embed_generators.as_ref().and_then(|generators| { + generators.iter().find(|gen| gen.tags.contains(&embed_data.tag)) + }) + { + // Prepare the command + // let mut command = if let Some(package_name) = &embed_generator.package { + // let node_modules_path = workspace_root + // .as_ref() + // .map(|root| Path::new(root).join("node_modules")) + // .unwrap_or_else(|| Path::new(&package.path).join("node_modules")); + // let generator_path = + // node_modules_path.join(package_name).join(&embed_generator.path); + // Command::new("node").arg(generator_path) + // } else { + // Command::new(&embed_generator.path) + // }; + let mut command = Command::new(&embed_generator.path); + + // Run the embed generator + let output = command + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .and_then(|mut child| { + use std::io::Write; + let contents = format!("{}\n{}", embed_data.tag, embed_data.contents); + child.stdin.as_mut().unwrap().write_all(contents.as_bytes())?; + child.wait_with_output() + }) + .map_err(|e| format!("Failed to run embed generator: {}", e))?; + + if !output.status.success() { + return Err(format!( + "Embed generator failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + // Write the output to the embed file + std::fs::write(&embed_path, output.stdout) + .map_err(|e| format!("Failed to write embed file: {}", e))?; + } else { + return Err(format!("No embed generator found for tag: {}", embed_data.tag)); + } + } + if !build_state.modules.contains_key(&module_name) { + let implementation = Implementation { + path: relative_path.to_string(), + parse_state: ParseState::Pending, + compile_state: CompileState::Pending, + last_modified, + parse_dirty: true, + }; + + let source_file = SourceFile { + implementation, + interface: None, + embeds: Vec::new(), + }; + + let module = Module { + source_type: SourceType::SourceFile(source_file), + deps: AHashSet::new(), + dependents: AHashSet::new(), + package_name: package.name.clone(), + compile_dirty: true, + last_compiled_cmi: None, + last_compiled_cmt: None, + }; + + build_state.insert_module(&module_name, module); + } else if dirty { + if let Some(module) = build_state.modules.get_mut(&module_name) { + if let SourceType::SourceFile(source_file) = &mut module.source_type { + source_file.implementation.parse_dirty = true; + } + } + } + + Ok(Embed { + hash: hash.to_string(), + embed: embed_data, + dirty, + }) + }) + .collect::>>(); + + // Update the source file's embeds + source_file.embeds = embeds.into_iter().filter_map(|result| result.ok()).collect(); + } + + Ok(()) +} + +fn is_embed_dirty(embed_path: &Path, embed_data: &EmbedJsonData, hash: &str) -> bool { + // Check if the embed file exists and compare hashes + // the first line of the generated rescript file is a comment with the following format: + // "// HASH: " + // if the hash is different from the hash in the embed_data, the embed is dirty + // if the file does not exist, the embed is dirty + // if the file exists but the hash is not present, the embed is dirty + // if the file exists but the hash is present but different from the hash in the embed_data, the embed is dirty + // if the file exists but the hash is present and the same as the hash in the embed_data, the embed is not dirty + if !embed_path.exists() { + return true; + } + + let first_line = match helpers::read_file(embed_path) { + Ok(contents) => contents.lines().next().unwrap_or("").to_string(), + Err(_) => return true, + }; + + if !first_line.starts_with("// HASH: ") { + return true; + } + + let file_hash = first_line.trim_start_matches("// HASH: "); + file_hash != hash +} + fn include_ppx(flag: &str, contents: &str) -> bool { if flag.contains("bisect") { return std::env::var("BISECT_ENABLE").is_ok(); diff --git a/src/helpers.rs b/src/helpers.rs index 0202f11..4584ef4 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -282,11 +282,12 @@ pub fn format_namespaced_module_name(module_name: &str) -> String { } } +pub fn compute_string_hash(str: &str) -> blake3::Hash { + blake3::hash(str.as_bytes()) +} + pub fn compute_file_hash(path: &str) -> Option { - match fs::read(path) { - Ok(str) => Some(blake3::hash(&str)), - Err(_) => None, - } + fs::read(path).map(|bytes| blake3::hash(&bytes)).ok() } fn has_rescript_config(path: &Path) -> bool {