Skip to content

Commit

Permalink
+ rhai - add md::top_block_content_or_raw (rel #11)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremychone committed Oct 12, 2024
1 parent bde232a commit 9fe8649
Show file tree
Hide file tree
Showing 4 changed files with 270 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repository = "https://github.com/jeremychone/rust-devai"

[lints.rust]
unsafe_code = "forbid"
# unused = { level = "allow", priority = -1 } # For exploratory dev.
unused = { level = "allow", priority = -1 } # For exploratory dev.

[dependencies]
# -- Async
Expand Down
23 changes: 21 additions & 2 deletions src/script/rhai_script/rhai_modules/rhai_md.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ pub fn rhai_module() -> Module {

FuncRegistration::new("extract_blocks")
.in_global_namespace()
.set_into_module(&mut module, extract_blocks_with_name);
.set_into_module(&mut module, extract_blocks_with_lang);

FuncRegistration::new("top_block_content_or_raw")
.in_global_namespace()
.set_into_module(&mut module, top_block_content_or_raw);

module
}
Expand Down Expand Up @@ -52,10 +56,25 @@ fn extract_blocks(md_content: &str) -> RhaiResult {
/// returning only the blocks with a
/// [language identifier](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks#syntax-highlighting)
/// that matches `lang_name`.
fn extract_blocks_with_name(md_content: &str, lang_name: &str) -> RhaiResult {
fn extract_blocks_with_lang(md_content: &str, lang_name: &str) -> RhaiResult {
let blocks: Vec<MdBlock> = md::MdBlocks::new(md_content, Some(lang_name)).collect();
let blocks: Vec<Dynamic> = blocks.into_iter().map(MdBlock::into_dynamic).collect();
Ok(blocks.into())
}

/// ## RHAI Documentation
/// ```rhai
/// top_block_content_or_raw(md_content: &str) -> Vec<MdBlock>
/// ```
///
/// Without fully parsing the markdown, this function removes the first ` ``` ` line and the last one, returning its content.
/// If no block is found, it returns the raw `md_content`.
///
/// > Note: This is useful in the genai context because often LLMs return a top block (e.g., markdown, Rust)
/// > which might have other ` ``` ` in the middle but should be interpreted as nested.
/// > (GenAI does not seem to know about the 6 ticks for top level)
fn top_block_content_or_raw(md_content: &str) -> String {
md::top_block_content_or_raw(md_content)
}

// endregion: --- Rhai Functions
2 changes: 2 additions & 0 deletions src/support/md/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// region: --- Modules

mod md_blocks;
mod top_content;

pub use md_blocks::*;
pub use top_content::*;

// endregion: --- Modules
246 changes: 246 additions & 0 deletions src/support/md/top_content.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/// Extracts the content between the first and last triple backticks (```)
/// in the given input string. If both opening and closing backticks are found,
/// it returns the enclosed content as a `String`. Otherwise, it returns the
/// raw input content.
///
/// # Arguments
///
/// * `content` - A string slice that holds the input text.
///
/// # Returns
///
/// A `String` containing the extracted content between the first and last ```
pub fn top_block_content_or_raw(content: &str) -> String {
// Split the input content into lines for line-by-line processing.
let lines: Vec<&str> = content.lines().collect();

// Find the index of the first line that starts with ``` (ignoring leading whitespace).
let first_backtick = lines.iter().position(|line| line.starts_with("```"));

// Find the index of the last line that starts with ``` (ignoring leading whitespace).
let last_backtick = lines.iter().rposition(|line| line.starts_with("```"));

// Check if both opening and closing backticks are found and are distinct.
if let (Some(start), Some(end)) = (first_backtick, last_backtick) {
// Ensure that the first backtick is before the last backtick.
if start < end {
// Extract the lines between the first and last backtick lines.
let extracted_lines = &lines[start + 1..end];
// Join the extracted lines back into a single string separated by newlines.
return extracted_lines.join("\n");
}
}

// If backticks are not properly found, return the original content.
content.to_string()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_md_top_block_content_simple() {
// -- Fixtures
let input = "\
Here is some text before the code block.
```
fn main() {
println!(\"Hello, world!\");
}
```
Here is some text after the code block.";
let expected = "fn main() {\n println!(\"Hello, world!\");\n}";

// -- Exec
let result = top_block_content_or_raw(input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_with_language() {
// -- Fixtures
let input = "\
Start of the text.
```python
def hello():
print(\"Hello, Python!\")
```
End of the text.";
let expected = "def hello():\n print(\"Hello, Python!\")";

// -- Exec
let result = top_block_content_or_raw(input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_multiple_code_blocks() {
// -- Fixtures
let fx_content = "
console.log(\"First code block\");
```
Some intermediate text.
Second code block:
```rust
fn main() {
println!(\"Second code block\");
}";
let input = format!(
"
First code block:
```javascript
{fx_content}
```
End of the text."
);

// -- Exec
let result = top_block_content_or_raw(&input);

// -- Check
assert_eq!(result, fx_content);
}

#[test]
fn test_md_top_block_content_no_backticks() {
// -- Fixtures
let input = "This is a regular text without any code blocks.";
let expected = "This is a regular text without any code blocks.";

// -- Exec
let result = top_block_content_or_raw(input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_only_opening_backticks() {
// -- Fixtures
let input = "\
Text before the code block.
```
fn incomplete() {
// Missing closing backticks
}";
let expected = "\
Text before the code block.
```
fn incomplete() {
// Missing closing backticks
}";

// -- Exec
let result = top_block_content_or_raw(input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_only_closing_backticks() {
// -- Fixtures
let input = "\
Missing opening backticks for this code block.
fn incomplete() {
// Missing opening backticks
}
```
";
let expected = "\
Missing opening backticks for this code block.
fn incomplete() {
// Missing opening backticks
}
```
";

// -- Exec
let result = top_block_content_or_raw(input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_adjacent_backticks() {
// -- Fixtures
let input = "\
Text before.
```
```
Text after.";
let expected = "";

// -- Exec
let result = top_block_content_or_raw(input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_with_whitespace() {
// -- Fixtures
let fx_input = "
Text before.
```
Line within code block with leading whitespace.
```
Text after.";
let expected = fx_input.to_string();

// -- Exec
let result = top_block_content_or_raw(fx_input);

// -- Check
assert_eq!(result, expected);
}

#[test]
fn test_md_top_block_content_with_inner_backticks() {
// -- Fixtures
let fx_content = "Here is some code with backticks:
let s = \"Hello, `world`!\";";
let input = format!(
"\
Start text.
```
{}
```
End text.",
fx_content
);
let expected = fx_content;

// -- Exec
let result = top_block_content_or_raw(&input);

// -- Check
assert_eq!(result, expected);
}
}

0 comments on commit 9fe8649

Please sign in to comment.