diff --git a/Cargo.toml b/Cargo.toml index f6709e3..90d061a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ repository = "https://github.com/jeremychone/rust-devai" [lints.rust] unsafe_code = "forbid" -# unused = { level = "allow", priority = -1 } # For exploratory dev. +unused = { level = "allow", priority = -1 } # For exploratory dev. [dependencies] # -- Async diff --git a/src/script/rhai_script/rhai_modules/rhai_md.rs b/src/script/rhai_script/rhai_modules/rhai_md.rs index d9206a6..68ac7a9 100644 --- a/src/script/rhai_script/rhai_modules/rhai_md.rs +++ b/src/script/rhai_script/rhai_modules/rhai_md.rs @@ -23,7 +23,11 @@ pub fn rhai_module() -> Module { FuncRegistration::new("extract_blocks") .in_global_namespace() - .set_into_module(&mut module, extract_blocks_with_name); + .set_into_module(&mut module, extract_blocks_with_lang); + + FuncRegistration::new("top_block_content_or_raw") + .in_global_namespace() + .set_into_module(&mut module, top_block_content_or_raw); module } @@ -52,10 +56,25 @@ fn extract_blocks(md_content: &str) -> RhaiResult { /// returning only the blocks with a /// [language identifier](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks#syntax-highlighting) /// that matches `lang_name`. -fn extract_blocks_with_name(md_content: &str, lang_name: &str) -> RhaiResult { +fn extract_blocks_with_lang(md_content: &str, lang_name: &str) -> RhaiResult { let blocks: Vec = md::MdBlocks::new(md_content, Some(lang_name)).collect(); let blocks: Vec = blocks.into_iter().map(MdBlock::into_dynamic).collect(); Ok(blocks.into()) } +/// ## RHAI Documentation +/// ```rhai +/// top_block_content_or_raw(md_content: &str) -> Vec +/// ``` +/// +/// Without fully parsing the markdown, this function removes the first ` ``` ` line and the last one, returning its content. +/// If no block is found, it returns the raw `md_content`. +/// +/// > Note: This is useful in the genai context because often LLMs return a top block (e.g., markdown, Rust) +/// > which might have other ` ``` ` in the middle but should be interpreted as nested. +/// > (GenAI does not seem to know about the 6 ticks for top level) +fn top_block_content_or_raw(md_content: &str) -> String { + md::top_block_content_or_raw(md_content) +} + // endregion: --- Rhai Functions diff --git a/src/support/md/mod.rs b/src/support/md/mod.rs index 8e0c61e..1ea3b2f 100644 --- a/src/support/md/mod.rs +++ b/src/support/md/mod.rs @@ -1,7 +1,9 @@ // region: --- Modules mod md_blocks; +mod top_content; pub use md_blocks::*; +pub use top_content::*; // endregion: --- Modules diff --git a/src/support/md/top_content.rs b/src/support/md/top_content.rs new file mode 100644 index 0000000..393ff4b --- /dev/null +++ b/src/support/md/top_content.rs @@ -0,0 +1,246 @@ +/// Extracts the content between the first and last triple backticks (```) +/// in the given input string. If both opening and closing backticks are found, +/// it returns the enclosed content as a `String`. Otherwise, it returns the +/// raw input content. +/// +/// # Arguments +/// +/// * `content` - A string slice that holds the input text. +/// +/// # Returns +/// +/// A `String` containing the extracted content between the first and last ``` +pub fn top_block_content_or_raw(content: &str) -> String { + // Split the input content into lines for line-by-line processing. + let lines: Vec<&str> = content.lines().collect(); + + // Find the index of the first line that starts with ``` (ignoring leading whitespace). + let first_backtick = lines.iter().position(|line| line.starts_with("```")); + + // Find the index of the last line that starts with ``` (ignoring leading whitespace). + let last_backtick = lines.iter().rposition(|line| line.starts_with("```")); + + // Check if both opening and closing backticks are found and are distinct. + if let (Some(start), Some(end)) = (first_backtick, last_backtick) { + // Ensure that the first backtick is before the last backtick. + if start < end { + // Extract the lines between the first and last backtick lines. + let extracted_lines = &lines[start + 1..end]; + // Join the extracted lines back into a single string separated by newlines. + return extracted_lines.join("\n"); + } + } + + // If backticks are not properly found, return the original content. + content.to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_md_top_block_content_simple() { + // -- Fixtures + let input = "\ +Here is some text before the code block. + +``` +fn main() { + println!(\"Hello, world!\"); +} +``` + +Here is some text after the code block."; + let expected = "fn main() {\n println!(\"Hello, world!\");\n}"; + + // -- Exec + let result = top_block_content_or_raw(input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_with_language() { + // -- Fixtures + let input = "\ +Start of the text. + +```python +def hello(): + print(\"Hello, Python!\") +``` + +End of the text."; + let expected = "def hello():\n print(\"Hello, Python!\")"; + + // -- Exec + let result = top_block_content_or_raw(input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_multiple_code_blocks() { + // -- Fixtures + let fx_content = " +console.log(\"First code block\"); +``` + +Some intermediate text. + +Second code block: + +```rust +fn main() { + println!(\"Second code block\"); +}"; + let input = format!( + " +First code block: + +```javascript +{fx_content} +``` + +End of the text." + ); + + // -- Exec + let result = top_block_content_or_raw(&input); + + // -- Check + assert_eq!(result, fx_content); + } + + #[test] + fn test_md_top_block_content_no_backticks() { + // -- Fixtures + let input = "This is a regular text without any code blocks."; + let expected = "This is a regular text without any code blocks."; + + // -- Exec + let result = top_block_content_or_raw(input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_only_opening_backticks() { + // -- Fixtures + let input = "\ +Text before the code block. + +``` +fn incomplete() { + // Missing closing backticks +}"; + let expected = "\ +Text before the code block. + +``` +fn incomplete() { + // Missing closing backticks +}"; + + // -- Exec + let result = top_block_content_or_raw(input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_only_closing_backticks() { + // -- Fixtures + let input = "\ +Missing opening backticks for this code block. + +fn incomplete() { + // Missing opening backticks +} +``` +"; + let expected = "\ +Missing opening backticks for this code block. + +fn incomplete() { + // Missing opening backticks +} +``` +"; + + // -- Exec + let result = top_block_content_or_raw(input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_adjacent_backticks() { + // -- Fixtures + let input = "\ +Text before. + +``` +``` + +Text after."; + let expected = ""; + + // -- Exec + let result = top_block_content_or_raw(input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_with_whitespace() { + // -- Fixtures + let fx_input = " +Text before. + + ``` + Line within code block with leading whitespace. + ``` + +Text after."; + let expected = fx_input.to_string(); + + // -- Exec + let result = top_block_content_or_raw(fx_input); + + // -- Check + assert_eq!(result, expected); + } + + #[test] + fn test_md_top_block_content_with_inner_backticks() { + // -- Fixtures + let fx_content = "Here is some code with backticks: +let s = \"Hello, `world`!\";"; + let input = format!( + "\ +Start text. + +``` +{} +``` + +End text.", + fx_content + ); + let expected = fx_content; + + // -- Exec + let result = top_block_content_or_raw(&input); + + // -- Check + assert_eq!(result, expected); + } +}