Skip to content

Commit

Permalink
fix(ssg): 🎨 Optimize and enhance error handling in post_process_html …
Browse files Browse the repository at this point in the history
…function

- Pre-compile alt and title regex objects for improved efficiency - Implement robust error handling for regex compilation - Update documentation to reflect changes in efficiency and error handling
  • Loading branch information
sebastienrousseau committed Jan 15, 2024
1 parent 5ce1f37 commit 52fd242
Showing 1 changed file with 30 additions and 73 deletions.
103 changes: 30 additions & 73 deletions src/modules/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT

extern crate regex;
use regex::Regex;
use regex::{Regex, Captures};
use std::error::Error;
use comrak::{ComrakOptions, markdown_to_html};
use crate::utilities::directory::{
Expand Down Expand Up @@ -209,115 +209,72 @@ pub fn convert_markdown_to_html(markdown_content: &str, options: &ComrakOptions)
/// If `title` is missing, it is set to the value of `alt`. If both are missing,
/// they remain unchanged.
///
/// Efficiency is enhanced by pre-compiling regex objects for `alt` and `title`
/// attributes outside the main processing loop. This approach minimizes redundant
/// computations, especially for large HTML contents.
///
/// Robust error handling is incorporated for regex compilation, ensuring that
/// the function responds gracefully to invalid regex patterns.
///
/// # Arguments
///
/// * `html` - The original HTML content as a string.
/// * `class_regex` - A `Regex` object for matching and replacing class attributes in HTML tags.
/// * `img_regex` - A `Regex` object for matching `<img>` tags in HTML.
/// * `title` - A placeholder title string, not used in current logic but kept for potential future use.
///
/// # Returns
///
/// A `Result` containing the transformed HTML content as a string if successful,
/// or a `Box<dyn Error>` if an error occurs.
/// or a `Box<dyn Error>` if an error occurs during regex compilation or processing.
///
/// # Errors
///
/// Returns an error if:
/// - A class attribute cannot be captured or a class value cannot be retrieved from the captured class attribute.
/// - The regular expression processing fails for any reason.
///
/// # Example
///
/// ```rust
/// use regex::Regex;
/// use std::error::Error;
/// use ssg::modules::html::post_process_html;
///
/// fn main() -> Result<(), Box<dyn Error>> {
/// let html = "<img src=\"image.jpg\" class=\"img-fluid\">";
/// let class_regex = Regex::new(r#".class=&quot;([^&]+)&quot;"#)?;
/// let img_regex = Regex::new(r#"(<img[^>]*?)(/?>)"#)?;
/// let title = "Unused Placeholder Title";
///
/// let processed_html = post_process_html(html, &class_regex, &img_regex)?;
/// println!("{}", processed_html);
///
/// Ok(())
/// }
/// ```
/// Returns an error if regex compilation or processing fails for any reason.
pub fn post_process_html(html: &str, class_regex: &Regex, img_regex: &Regex) -> Result<String, Box<dyn Error>> {
let mut processed_html = String::new();

// Pre-compiled regex for alt and title attributes (outside the loop for efficiency)
let alt_regex = Regex::new(r#"alt="([^"]*)""#)
.map_err(|e| format!("Failed to compile alt regex: {}", e))?;
let title_regex = Regex::new(r#"title="([^"]*)""#)
.map_err(|e| format!("Failed to compile title regex: {}", e))?;

for line in html.lines() {
let mut processed_line = line.to_string();

// Temporarily store class value
let class_value = if line.contains(".class=&quot;") {
class_regex.captures(&processed_line)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str().to_string())
} else {
None
};

// Process class attributes
if let Some(class_value) = class_value {
if let Some(class_value) = class_regex.captures(&processed_line)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str().to_string()) {

processed_line = class_regex.replace(&processed_line, "").to_string();
processed_line = img_regex.replace(&processed_line, &format!("$1 class=\"{}\"$2", class_value)).to_string();
}

// Add alt and title attributes to img tags
processed_line = img_regex.replace_all(&processed_line, |caps: &regex::Captures| {
let img_tag_start = &caps[1]; // <img... up to the closure
let img_tag_end = &caps[2]; // /> or >
// Process <img> tags
processed_line = img_regex.replace_all(&processed_line, |caps: &Captures| {
let img_tag_start = &caps[1];
let img_tag_end = &caps[2];

let mut new_img_tag = img_tag_start.to_string();

// Regex to find the alt attribute
let alt_regex = regex::Regex::new(r#"alt="([^"]*)""#).unwrap();

// Extract the value of the alt attribute and convert it to lowercase
let alt_value = alt_regex.captures(img_tag_start)
.and_then(|c| c.get(1))
.map_or(String::new(), |m| m.as_str().to_lowercase());
.map_or(String::new(), |c| c.get(1).map_or(String::new(), |m| m.as_str().to_lowercase()));

// Check if 'title' is present; if not, add it. If it is, replace it with the alt value
if new_img_tag.contains("title=") {
let title_prefix = if !alt_value.is_empty() { "Image of " } else { "" };
let max_alt_length = 66 - title_prefix.len();

// Ensure that we slice at a valid character boundary
let alt_substr = if !alt_value.is_empty() && alt_value.chars().count() > max_alt_length {
alt_value.char_indices().enumerate().take_while(|&(char_count, _)| {
char_count < max_alt_length
}).last().map_or(&alt_value[..], |(_, (idx, _))| {
&alt_value[..idx]
})
} else {
&alt_value
};

let title_regex = regex::Regex::new(r#"title="([^"]*)""#).unwrap();
let alt_substr = alt_value.chars().take(max_alt_length).collect::<String>();
new_img_tag = title_regex.replace(&new_img_tag, format!(r#"title="{}{}""#, title_prefix, alt_substr)).to_string();
} else {
let title_prefix = if !alt_value.is_empty() { "Image of " } else { "" };
let max_alt_length = 66 - title_prefix.len();

// Ensure that we slice at a valid character boundary
let alt_substr = if !alt_value.is_empty() && alt_value.chars().count() > max_alt_length {
alt_value.char_indices().enumerate().take_while(|&(char_count,_)| {
char_count < max_alt_length
}).last().map_or(&alt_value[..], |(_, (idx, _))| {
&alt_value[..idx]
})
} else {
&alt_value
};
new_img_tag.push_str(&format!(" title=\"{}{}\"", title_prefix, alt_substr));
} else if !alt_value.is_empty() {
// Add 'title' attribute if it's missing
new_img_tag.push_str(&format!(" title=\"Image of {}\"", alt_value));
}

// Append the closure of the tag (either /> or >)
// Append the closure of the tag
new_img_tag.push_str(img_tag_end);
new_img_tag
}).to_string();
Expand Down

0 comments on commit 52fd242

Please sign in to comment.