diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index d10323e..c175f53 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -71,9 +71,13 @@ jobs:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@1.60
- - name: Check (no filters)
+ - name: Check (no filters, no syntax)
run: cargo check --no-default-features --features serde,unicode
+ - uses: dtolnay/rust-toolchain@1.61
+ - name: Check (no filters)
+ run: cargo check --no-default-features --features serde,syntax,unicode
+
- uses: dtolnay/rust-toolchain@1.65
- name: Test
run: cargo test
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9bd2ca4..3988348 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,6 @@
{
+ "rust-analyzer.check.features": "all",
+ "rust-analyzer.cargo.features": "all",
"rust-analyzer.server.extraEnv": {
"RUSTFLAGS": "--cfg internal_debug"
},
diff --git a/Cargo.lock b/Cargo.lock
index 25c7267..30c6ea2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -747,6 +747,7 @@ version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f98b09920c8be9ff96a5625aca5b5db7a4f4ba025132ff7d7aacb72c0244a45"
dependencies = [
+ "aho-corasick",
"serde",
]
@@ -1329,6 +1330,7 @@ dependencies = [
name = "upon"
version = "0.8.1"
dependencies = [
+ "aho-corasick",
"serde",
"unicode-ident",
"unicode-width",
diff --git a/Cargo.toml b/Cargo.toml
index 96ba98c..7e6e7c4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,6 +16,7 @@ include = ["src/**/*", "LICENSE-*", "README.md"]
rustdoc-args = ["--cfg", "docsrs"]
[dependencies]
+aho-corasick = { version = "1.1.2", optional = true }
serde = { version = "1.0.137", optional = true }
unicode-ident = { version = "1.0.5", optional = true }
unicode-width = { version = "0.1.9", optional = true }
@@ -35,6 +36,10 @@ filters = []
# the context using `Value`'s '`From` impls.
serde = ["dep:serde"]
+# Enables support for configuring custom delimiters in templates and pulls in
+# the `aho-corasick` crate.
+syntax = ["dep:aho-corasick"]
+
# Allows unicode identifiers in templates and enables improved error
# formatting.
unicode = ["dep:unicode-ident", "dep:unicode-width"]
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
index 3e7e1f3..396fc3e 100644
--- a/benches/Cargo.toml
+++ b/benches/Cargo.toml
@@ -7,13 +7,13 @@ publish = false
[dependencies]
handlebars = "4.3.7"
liquid = "0.26.4"
-minijinja = "1.0.5"
+minijinja = { version = "1.0.5", features = ["custom_syntax"] }
rand = "0.8.5"
serde = { version = "1.0.137", features = ["derive"] }
serde_json = "1.0.103"
tera = "1.19.0"
tinytemplate = "1.2.1"
-upon = { path = ".." }
+upon = { path = "..", features = ["syntax"] }
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
diff --git a/benches/benchdata/syntax/minijinja.html b/benches/benchdata/syntax/minijinja.html
new file mode 100644
index 0000000..d234a79
--- /dev/null
+++ b/benches/benchdata/syntax/minijinja.html
@@ -0,0 +1,23 @@
+
+
+
+ { title }
+
+
+
+
+ Name |
+ Age |
+
+ <%- for user in users -%>
+ <% if not user.is_disabled %>
+ <# This is a comment #>
+
+ { user.name } |
+ { user.age } |
+
+ <% endif %>
+ <%- endfor -%>
+
+
+
diff --git a/benches/benchdata/syntax/upon.html b/benches/benchdata/syntax/upon.html
new file mode 100644
index 0000000..d234a79
--- /dev/null
+++ b/benches/benchdata/syntax/upon.html
@@ -0,0 +1,23 @@
+
+
+
+ { title }
+
+
+
+
+ Name |
+ Age |
+
+ <%- for user in users -%>
+ <% if not user.is_disabled %>
+ <# This is a comment #>
+
+ { user.name } |
+ { user.age } |
+
+ <% endif %>
+ <%- endfor -%>
+
+
+
diff --git a/benches/benches/engines.rs b/benches/benches/engines.rs
index 24ceb45..c336e16 100644
--- a/benches/benches/engines.rs
+++ b/benches/benches/engines.rs
@@ -11,6 +11,7 @@ criterion_group! {
benches,
bench_init,
bench_compile,
+ bench_syntax,
bench_render,
bench_filters,
}
@@ -57,6 +58,25 @@ pub fn bench_compile(c: &mut Criterion) {
bench!(Upon, "../benchdata/basic/upon.html");
}
+/// Benchmarks the time taken to compile a template with custom syntax.
+pub fn bench_syntax(c: &mut Criterion) {
+ let mut g = c.benchmark_group("syntax");
+
+ macro_rules! bench {
+ ($E:ty, $source:literal) => {{
+ g.bench_function(<$E as Engine>::name(), |b| {
+ let source = repeat(include_str!($source), 50);
+ let mut engine =
+ <$E as Engine>::with_syntax(("{", "}"), ("<%", "%>"), ("<#", "#>"));
+ b.iter(|| engine.add_template("bench", &source));
+ });
+ }};
+ }
+
+ bench!(Minijinja, "../benchdata/syntax/minijinja.html");
+ bench!(Upon, "../benchdata/syntax/upon.html");
+}
+
/// Benchmarks the time taken to render a template as a string.
pub fn bench_render(c: &mut Criterion) {
let mut g = c.benchmark_group("render");
diff --git a/benches/src/lib.rs b/benches/src/lib.rs
index 92ea83e..5a54631 100644
--- a/benches/src/lib.rs
+++ b/benches/src/lib.rs
@@ -5,10 +5,19 @@ mod tests;
use std::collections::HashMap;
/// Abstraction for a template engine.
-pub trait Engine<'a> {
+pub trait Engine<'a>: Sized {
fn name() -> &'static str;
fn new() -> Self;
- fn add_filters(&mut self);
+ fn with_syntax(
+ _expr: (&'static str, &'static str),
+ _block: (&'static str, &'static str),
+ _comment: (&'static str, &'static str),
+ ) -> Self {
+ unimplemented!()
+ }
+ fn add_filters(&mut self) {
+ unimplemented!()
+ }
fn add_template(&mut self, name: &'static str, source: &'a str);
fn render(&self, name: &'static str, ctx: &S) -> String
where
@@ -83,9 +92,6 @@ impl<'engine> Engine<'engine> for Liquid {
}
}
- #[inline]
- fn add_filters(&mut self) {}
-
#[inline]
fn add_template(&mut self, name: &'static str, source: &'engine str) {
let template = self.parser.parse(source).unwrap();
@@ -123,6 +129,24 @@ impl<'engine> Engine<'engine> for Minijinja<'engine> {
}
#[inline]
+ fn with_syntax(
+ (variable_start, variable_end): (&'static str, &'static str),
+ (block_start, block_end): (&'static str, &'static str),
+ (comment_start, comment_end): (&'static str, &'static str),
+ ) -> Self {
+ let mut env = minijinja::Environment::new();
+ env.set_syntax(minijinja::Syntax {
+ block_start: block_start.into(),
+ block_end: block_end.into(),
+ variable_start: variable_start.into(),
+ variable_end: variable_end.into(),
+ comment_start: comment_start.into(),
+ comment_end: comment_end.into(),
+ })
+ .unwrap();
+ env
+ }
+
fn add_filters(&mut self) {}
#[inline]
@@ -156,7 +180,6 @@ impl<'engine> Engine<'engine> for Tera {
tera::Tera::default()
}
- #[inline]
fn add_filters(&mut self) {}
#[inline]
@@ -195,9 +218,6 @@ impl<'engine> Engine<'engine> for TinyTemplate<'engine> {
tt
}
- #[inline]
- fn add_filters(&mut self) {}
-
#[inline]
fn add_template(&mut self, name: &'static str, source: &'engine str) {
self.add_template(name, source).unwrap();
@@ -229,6 +249,21 @@ impl<'engine> Engine<'engine> for upon::Engine<'engine> {
upon::Engine::new()
}
+ #[inline]
+ fn with_syntax(
+ (begin_expr, end_expr): (&'static str, &'static str),
+ (begin_block, end_block): (&'static str, &'static str),
+ (begin_comment, end_comment): (&'static str, &'static str),
+ ) -> Self {
+ upon::Engine::with_syntax(
+ upon::Syntax::builder()
+ .expr(begin_expr, end_expr)
+ .block(begin_block, end_block)
+ .comment(begin_comment, end_comment)
+ .build(),
+ )
+ }
+
#[inline]
fn add_filters(&mut self) {
self.add_filter("lower", str::to_lowercase);
diff --git a/benches/src/testdata/syntax_minijinja.golden b/benches/src/testdata/syntax_minijinja.golden
new file mode 100644
index 0000000..d46e5f1
--- /dev/null
+++ b/benches/src/testdata/syntax_minijinja.golden
@@ -0,0 +1,25 @@
+
+
+
+ My awesome webpage!
+
+
+
+
+ Name |
+ Age |
+
+
+
+ Nancy Wheeler |
+ 17 |
+
+
+
+
+ Steve Harrington |
+ 18 |
+
+
+
+
\ No newline at end of file
diff --git a/benches/src/testdata/syntax_upon.golden b/benches/src/testdata/syntax_upon.golden
new file mode 100644
index 0000000..006f59b
--- /dev/null
+++ b/benches/src/testdata/syntax_upon.golden
@@ -0,0 +1,25 @@
+
+
+
+ My awesome webpage!
+
+
+
+
+ Name |
+ Age |
+
+
+
+ Nancy Wheeler |
+ 17 |
+
+
+
+
+ Steve Harrington |
+ 18 |
+
+
+
+
diff --git a/benches/src/tests.rs b/benches/src/tests.rs
index aefabfc..02eb7d3 100644
--- a/benches/src/tests.rs
+++ b/benches/src/tests.rs
@@ -3,7 +3,21 @@ use crate::{Engine, Handlebars, Liquid, Minijinja, Tera, TinyTemplate, Upon};
macro_rules! t {
($E:ty, $source:literal) => {{
- let result = render::<$E>(include_str!($source));
+ let result = render::<$E>(include_str!($source), false, false);
+ goldie::assert!(result);
+ }};
+}
+
+macro_rules! t_filters {
+ ($E:ty, $source:literal) => {{
+ let result = render::<$E>(include_str!($source), false, true);
+ goldie::assert!(result);
+ }};
+}
+
+macro_rules! t_syntax {
+ ($E:ty, $source:literal) => {{
+ let result = render::<$E>(include_str!($source), true, false);
goldie::assert!(result);
}};
}
@@ -39,25 +53,35 @@ fn basic_upon() {
#[test]
fn filters_handlebars() {
- t!(Handlebars, "../benchdata/filters/handlebars.html");
+ t_filters!(Handlebars, "../benchdata/filters/handlebars.html");
}
#[test]
fn filters_minijinja() {
- t!(Minijinja, "../benchdata/filters/minijinja.html");
+ t_filters!(Minijinja, "../benchdata/filters/minijinja.html");
}
#[test]
fn filters_tera() {
- t!(Tera, "../benchdata/filters/tera.html");
+ t_filters!(Tera, "../benchdata/filters/tera.html");
}
#[test]
fn filters_upon() {
- t!(Upon, "../benchdata/filters/upon.html");
+ t_filters!(Upon, "../benchdata/filters/upon.html");
+}
+
+#[test]
+fn syntax_minijinja() {
+ t_syntax!(Minijinja, "../benchdata/syntax/minijinja.html");
}
-fn render<'a, E: Engine<'a>>(source: &'a str) -> String {
+#[test]
+fn syntax_upon() {
+ t_syntax!(Upon, "../benchdata/syntax/upon.html");
+}
+
+fn render<'a, E: Engine<'a>>(source: &'a str, syntax: bool, filters: bool) -> String {
let ctx = Context {
title: "My awesome webpage!".to_owned(),
users: vec![
@@ -79,8 +103,14 @@ fn render<'a, E: Engine<'a>>(source: &'a str) -> String {
],
};
- let mut engine = E::new();
- engine.add_filters();
+ let mut engine = if syntax {
+ E::with_syntax(("{", "}"), ("<%", "%>"), ("<#", "#>"))
+ } else {
+ E::new()
+ };
+ if filters {
+ engine.add_filters();
+ }
engine.add_template("bench", source);
engine.render("bench", &ctx)
}
diff --git a/src/compile/lex.rs b/src/compile/lex.rs
index c5ae961..18bd26e 100644
--- a/src/compile/lex.rs
+++ b/src/compile/lex.rs
@@ -1,6 +1,6 @@
use crate::compile::parse::Keyword;
+use crate::types::delimiter::Delimiter;
use crate::types::span::Span;
-use crate::types::syntax;
use crate::{Engine, Error, Result};
/// A lexer that tokenizes the template source into distinct chunks so that the
@@ -196,8 +196,8 @@ impl<'engine, 'source> Lexer<'engine, 'source> {
};
match self.engine.searcher.find_at(self.source, i) {
- Some((kind, j, k)) => {
- let (tk, trim) = Token::from_kind(kind);
+ Some((delimiter, j, k)) => {
+ let (tk, trim) = Token::from_delimiter(delimiter);
if !tk.is_begin_tag() {
return Err(self.err_unexpected_token(tk, j..k));
@@ -247,8 +247,8 @@ impl<'engine, 'source> Lexer<'engine, 'source> {
// for the corresponding end tag `end`.
let (tk, j) = match self.engine.searcher.starts_with(self.source, i) {
- Some((kind, j)) => {
- let (tk, trim) = Token::from_kind(kind);
+ Some((delimiter, j)) => {
+ let (tk, trim) = Token::from_delimiter(delimiter);
if tk.is_begin_tag() {
return Err(self.err_unclosed(begin, end));
@@ -364,8 +364,8 @@ impl<'engine, 'source> Lexer<'engine, 'source> {
// i j k
match self.engine.searcher.find_at(self.source, i) {
- Some((kind, j, k)) => {
- let (tk, trim) = Token::from_kind(kind);
+ Some((delimiter, j, k)) => {
+ let (tk, trim) = Token::from_delimiter(delimiter);
if tk.is_begin_tag() {
return Err(self.err_unclosed(begin, end));
@@ -566,20 +566,20 @@ impl Token {
matches!(self, Self::Whitespace)
}
- fn from_kind(tk: syntax::Kind) -> (Self, bool) {
- match tk {
- syntax::Kind::BeginExpr => (Self::BeginExpr, false),
- syntax::Kind::EndExpr => (Self::EndExpr, false),
- syntax::Kind::BeginExprTrim => (Self::BeginExpr, true),
- syntax::Kind::EndExprTrim => (Self::EndExpr, true),
- syntax::Kind::BeginBlock => (Self::BeginBlock, false),
- syntax::Kind::EndBlock => (Self::EndBlock, false),
- syntax::Kind::BeginBlockTrim => (Self::BeginBlock, true),
- syntax::Kind::EndBlockTrim => (Self::EndBlock, true),
- syntax::Kind::BeginComment => (Self::BeginComment, false),
- syntax::Kind::EndComment => (Self::EndComment, false),
- syntax::Kind::BeginCommentTrim => (Self::BeginComment, true),
- syntax::Kind::EndCommentTrim => (Self::EndComment, true),
+ fn from_delimiter(d: Delimiter) -> (Self, bool) {
+ match d {
+ Delimiter::BeginExpr => (Self::BeginExpr, false),
+ Delimiter::EndExpr => (Self::EndExpr, false),
+ Delimiter::BeginExprTrim => (Self::BeginExpr, true),
+ Delimiter::EndExprTrim => (Self::EndExpr, true),
+ Delimiter::BeginBlock => (Self::BeginBlock, false),
+ Delimiter::EndBlock => (Self::EndBlock, false),
+ Delimiter::BeginBlockTrim => (Self::BeginBlock, true),
+ Delimiter::EndBlockTrim => (Self::EndBlock, true),
+ Delimiter::BeginComment => (Self::BeginComment, false),
+ Delimiter::EndComment => (Self::EndComment, false),
+ Delimiter::BeginCommentTrim => (Self::BeginComment, true),
+ Delimiter::EndCommentTrim => (Self::EndComment, true),
}
}
}
@@ -641,6 +641,15 @@ mod tests {
);
}
+ #[test]
+ fn lex_begin_expr_trickery() {
+ let tokens = lex("lorem { ipsum {{").unwrap();
+ assert_eq!(
+ tokens,
+ [(Token::Raw, "lorem { ipsum "), (Token::BeginExpr, "{{"),]
+ );
+ }
+
#[test]
fn lex_begin_expr_trim() {
let tokens = lex("lorem ipsum \t\n{{-").unwrap();
@@ -836,6 +845,23 @@ mod tests {
);
}
+ #[test]
+ fn lex_block_trim() {
+ let tokens = lex("lorem ipsum {%- dolor -%} sit").unwrap();
+ assert_eq!(
+ tokens,
+ [
+ (Token::Raw, "lorem ipsum"),
+ (Token::BeginBlock, "{%-"),
+ (Token::Whitespace, " "),
+ (Token::Ident, "dolor"),
+ (Token::Whitespace, " "),
+ (Token::EndBlock, "-%}"),
+ (Token::Raw, "sit"),
+ ]
+ );
+ }
+
#[test]
fn lex_block_and_expr() {
let tokens =
@@ -899,6 +925,21 @@ mod tests {
);
}
+ #[test]
+ fn lex_end_comment() {
+ let tokens = lex("lorem ipsum {# dolor #} sit amet").unwrap();
+ assert_eq!(
+ tokens,
+ [
+ (Token::Raw, "lorem ipsum "),
+ (Token::BeginComment, "{#"),
+ (Token::Raw, " dolor "),
+ (Token::EndComment, "#}"),
+ (Token::Raw, " sit amet"),
+ ]
+ );
+ }
+
#[test]
fn lex_end_comment_trim() {
let tokens = lex("lorem ipsum {# -#} \t\ndolor sit amet").unwrap();
diff --git a/src/compile/search/aho_corasick.rs b/src/compile/search/aho_corasick.rs
new file mode 100644
index 0000000..e52ef30
--- /dev/null
+++ b/src/compile/search/aho_corasick.rs
@@ -0,0 +1,41 @@
+use crate::types::delimiter::Delimiter;
+use crate::types::syntax::Syntax;
+use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
+
+#[cfg_attr(internal_debug, derive(Debug))]
+pub struct AhoCorasickSearcher {
+ imp: AhoCorasick,
+ kinds: Vec,
+}
+
+impl AhoCorasickSearcher {
+ pub fn new(syntax: Syntax) -> Self {
+ let imp = AhoCorasickBuilder::new()
+ .match_kind(MatchKind::LeftmostLongest)
+ .build(syntax.patterns)
+ .expect("failed to build AhoCorasick");
+ Self {
+ imp,
+ kinds: syntax.marks,
+ }
+ }
+
+ #[inline]
+ pub fn find_at(&self, source: &str, at: usize) -> Option<(Delimiter, usize, usize)> {
+ let sb = source.as_bytes();
+ self.imp.find(&sb[at..]).map(|m| {
+ let kind = self.kinds[m.pattern()];
+ (kind, at + m.start(), at + m.end())
+ })
+ }
+
+ #[inline]
+ pub fn starts_with(&self, source: &str, at: usize) -> Option<(Delimiter, usize)> {
+ let (kind, i, j) = self.find_at(source, at)?;
+ if at == i {
+ Some((kind, j))
+ } else {
+ None
+ }
+ }
+}
diff --git a/src/compile/search/ahocorasick/build.rs b/src/compile/search/ahocorasick/build.rs
deleted file mode 100644
index 2afffcc..0000000
--- a/src/compile/search/ahocorasick/build.rs
+++ /dev/null
@@ -1,301 +0,0 @@
-//! A builder for an Aho-Corasick automaton.
-//!
-//! From the given set of patterns we build a state machine with a series of
-//! states that encode a transition for every possible byte. This state machine
-//! can then used to simultaneously search a string for the patterns.
-//!
-//! Consider building an Aho-Corasick automaton with the following patterns:
-//! 'ab' and 'cd', the trie would look the following. Where the states are
-//! represented as `S?` and have an asterisk (`*`) if there any matches at that
-//! state.
-//!
-//! ```text
-//! a - S1 - b - S2*
-//! /
-//! S0 - c - S3 - d - S4*
-//! ```
-//!
-//! In the above state machine there are no bytes that are the same between the
-//! patterns. Now consider the following patterns: 'abe' and 'bcd'. In the case
-//! of an input text of 'abcd', when at S2 we would end up failing to transition
-//! to S3. But we can encode the failure in the automaton as a transition from
-//! S2 to S4 and continue the search. What is not shown in these diagrams is that
-//! *all* states have a failure transition, but only S2 has a *non-trivial*
-//! failure transition. That is, all other states have a failure transition back
-//! to the start state.
-//!
-//! ```text
-//! a - S1 - b - S2 - e - S3*
-//! / /
-//! / -------
-//! / /
-//! S0 - b - S4 - c - S5 - d - S6*
-//! ```
-//!
-//! Encoding the failure transitions is the most complex part of building the
-//! automaton. Traditionally, this is implemented using a breadth-first search
-//! starting with all transitions from the start state. For each state and for
-//! every input transition at that state we follow the failure transitions
-//! backward until we find a failure state that has a forward transition for
-//! that input. That state must be the fail state for the original state.
-//!
-//! In order to support leftmost-longest match first semantics we also need
-//! to make a few modifications to the way the failure transitions are built.
-
-use std::collections::VecDeque;
-
-use super::{AhoCorasick, Pattern, State, DEAD, FAIL, S, START};
-
-#[derive(Default)]
-pub struct Builder {
- states: Vec,
-}
-
-impl Builder {
- pub fn build(mut self, patterns: I) -> AhoCorasick
- where
- I: IntoIterator- ,
- X: Into,
- P: AsRef<[u8]>,
- {
- self.push_state(0); // the fail state
- self.push_state(0); // the dead state
- self.push_state(0); // the start state
- self.build_initial_trie(patterns);
-
- // Set the failure transitions in the start state to loop back to the
- // start state.
- let start = self.start_mut();
- for byte in all() {
- if start.next_state(byte) == FAIL {
- start.set_transition(byte, START);
- }
- }
-
- // Set the failure transitions in the dead state to loop back to the
- // dead state.
- let dead = self.state_mut(DEAD);
- for byte in all() {
- if dead.next_state(byte) == FAIL {
- dead.set_transition(byte, DEAD);
- }
- }
-
- self.fill_failure_transitions();
-
- // Remove the start state loop by rewriting any transitions on the start
- // state back to the start state with transitions to the dead state.
- if self.start().is_match() {
- let start = self.start_mut();
- for byte in all() {
- if start.next_state(byte) == START {
- start.set_transition(byte, DEAD);
- }
- }
- }
-
- let Self { states } = self;
- AhoCorasick { states }
- }
-
- /// Build the initial trie where each pattern has a path from the start
- /// state until the end of the pattern.
- fn build_initial_trie(&mut self, patterns: I)
- where
- I: IntoIterator
- ,
- X: Into,
- P: AsRef<[u8]>,
- {
- for (pattern_id, pattern) in patterns.into_iter() {
- let pattern = pattern.as_ref();
-
- let mut id = START;
- for (depth, &byte) in pattern.iter().enumerate() {
- let next = self.state(id).next_state(byte);
- if next == FAIL {
- let next = self.push_state(depth + 1);
- self.state_mut(id).set_transition(byte, next);
- id = next;
- } else {
- id = next;
- }
- }
-
- let p = Pattern::new(pattern_id.into(), pattern.len());
- self.state_mut(id).push_match(p);
- }
- }
-
- fn fill_failure_transitions(&mut self) {
- // Initialize the queue for breadth first search with all transitions
- // out of the start state. We handle the start state specially because
- // we only want to follow non-self transitions. If we followed self
- // transitions, then this would never terminate.
- let mut queue = VecDeque::new();
- for byte in all() {
- let next = self.start().next_state(byte);
- if next != START {
- let match_depth = if self.start().is_match() {
- Some(0)
- } else {
- None
- };
- queue.push_back((next, match_depth));
-
- // If a state immediately following the start state is a match
- // state, then we never want to follow its failure transition
- // since the failure transition necessarily leads back to the
- // start state, which we never want to do for leftmost matching
- // after a match has been found.
- //
- // N.B. This is a special case of the more general handling
- // found below.
- if self.state(next).is_match() {
- self.state_mut(next).fail = DEAD;
- }
- }
- }
-
- while let Some((curr, match_depth)) = queue.pop_front() {
- let prev_len = queue.len();
-
- for byte in all() {
- let next = self.state(curr).next_state(byte);
- if next == FAIL {
- continue;
- }
-
- let next_match_depth = match match_depth {
- Some(d) => Some(d),
- None if self.state(next).is_match() => {
- let depth = self.state(next).depth
- - self.state(next).get_longest_match_len().unwrap()
- + 1;
- Some(depth)
- }
- None => None,
- };
-
- queue.push_back((next, next_match_depth));
-
- let fail = {
- let mut id = self.state(curr).fail;
- while self.state(id).next_state(byte) == FAIL {
- id = self.state(id).fail;
- }
- self.state(id).next_state(byte)
- };
-
- // Thanks Andrew Gallant
- if let Some(match_depth) = next_match_depth {
- let fail_depth = self.state(fail).depth;
- let next_depth = self.state(next).depth;
- if next_depth - match_depth + 1 > fail_depth {
- self.state_mut(next).fail = DEAD;
- continue;
- }
- assert_ne!(
- self.state(next).fail,
- START,
- "states that are match states or follow match \
- states should never have a failure transition \
- back to the start state in leftmost searching",
- );
- }
-
- self.state_mut(next).fail = fail;
- self.copy_matches(fail, next);
- }
-
- // If there are no transitions for this state and if it's a match
- // state, then we must set its failure transition to the dead
- // state since we never want it to restart the search.
- if queue.len() == prev_len && self.state(curr).is_match() {
- self.state_mut(curr).fail = DEAD;
- }
-
- // We don't need to copy empty matches from the start state here
- // because that's only necessary for overlapping matches and
- // leftmost match kinds don't support overlapping matches.
- }
- }
-
- fn copy_matches(&mut self, src: S, dst: S) {
- assert!(src != dst, "src {src} must not be equal to dst {dst}");
-
- // Simply gets a mutable reference to both states.
- let i = src;
- let j = dst;
- let (src, dst) = if i < j {
- let (left, right) = self.states.split_at_mut(j);
- (&mut left[i], &mut right[0])
- } else {
- let (left, right) = self.states.split_at_mut(i);
- (&mut right[0], &mut left[j])
- };
-
- dst.matches.extend_from_slice(&src.matches);
- }
-
- fn push_state(&mut self, depth: usize) -> S {
- let id = self.states.len();
- self.states.push(State {
- depth,
- fail: START,
- trans: [FAIL; 256],
- matches: vec![],
- });
- id
- // match id.try_into() {
- // Ok(id) => id,
- // Err(_) => {
- // panic!(
- // "state id type `{}` too small for the \
- // number of states in the automaton",
- // std::any::type_name::
()
- // );
- // }
- // }
- }
-
- fn state(&self, id: S) -> &State {
- &self.states[id]
- }
-
- fn state_mut(&mut self, id: S) -> &mut State {
- &mut self.states[id]
- }
-
- fn start(&self) -> &State {
- self.state(START)
- }
-
- fn start_mut(&mut self) -> &mut State {
- self.state_mut(START)
- }
-}
-
-impl State {
- fn push_match(&mut self, p: Pattern) {
- self.matches.push(p);
- }
-
- fn set_transition(&mut self, byte: u8, to: S) {
- self.trans[byte as usize] = to;
- }
-
- fn get_longest_match_len(&self) -> Option {
- // Why is this true? Because the first match in any matching state
- // will always correspond to the match added to it during trie
- // construction (since when we copy matches due to failure transitions,
- // we always append them). Therefore, it follows that the first match
- // must always be longest since any subsequent match must be from a
- // failure transition, and a failure transition by construction points
- // to a proper suffix. A proper suffix is, by definition, smaller.
- self.matches.first().map(|&p| p.len)
- }
-}
-
-fn all() -> impl Iterator- {
- 0..=255
-}
diff --git a/src/compile/search/ahocorasick/mod.rs b/src/compile/search/ahocorasick/mod.rs
deleted file mode 100644
index 6b9772c..0000000
--- a/src/compile/search/ahocorasick/mod.rs
+++ /dev/null
@@ -1,291 +0,0 @@
-//! A fast, multi-pattern searcher based on [Aho-Corasick algorithm][wikipedia].
-//!
-//! The design presented here mostly implements the standard algorithm as well
-//! as some unique ideas from the excellent [`aho-corasick`][aho-corasick]
-//! crate. This implementation only supports non-overlapping, leftmost-longest
-//! match first semantics.
-//!
-//! [aho-corasick]: https://crates.io/crates/aho-corasick
-//! [wikipedia]: https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm
-
-mod build;
-mod state;
-
-use self::build::Builder;
-use self::state::{State, DEAD, FAIL, S, START};
-
-#[cfg_attr(internal_debug, derive(Debug))]
-pub struct AhoCorasick {
- states: Vec,
-}
-
-#[cfg_attr(internal_debug, derive(Debug))]
-pub struct Match {
- pattern: Pattern,
- end: usize,
-}
-
-#[derive(Clone, Copy)]
-#[cfg_attr(internal_debug, derive(Debug))]
-pub struct Pattern {
- id: usize,
- len: usize,
-}
-
-impl AhoCorasick {
- pub fn new(patterns: I) -> Self
- where
- I: IntoIterator
- ,
- X: Into,
- P: AsRef<[u8]>,
- {
- Builder::default().build(patterns)
- }
-
- pub fn find_at(&self, haystack: T, mut at: usize) -> Option
- where
- T: AsRef<[u8]>,
- {
- let haystack = haystack.as_ref();
-
- let mut state = START;
- let mut last_match = self.get_match(state, 0, at);
- while at < haystack.len() {
- state = self.next_state(state, haystack[at]);
- debug_assert!(
- state != FAIL,
- "an automaton should never return fail state for next state"
- );
- at += 1;
-
- if state == DEAD {
- debug_assert!(
- last_match.is_some(),
- "an automaton should never return a dead state without a prior match"
- );
- return last_match;
- }
-
- if let Some(m) = self.get_match(state, 0, at) {
- last_match = Some(m);
- }
- }
- last_match
- }
-
- fn get_match(&self, id: S, match_id: usize, end: usize) -> Option {
- self.state(id)
- .matches
- .get(match_id)
- .map(|&pattern| Match { pattern, end })
- }
-
- fn next_state(&self, mut id: S, byte: u8) -> S {
- loop {
- let state = self.state(id);
- let next = state.next_state(byte);
- if next != FAIL {
- return next;
- }
- id = state.fail;
- }
- }
-
- fn state(&self, id: S) -> &State {
- &self.states[id]
- }
-}
-
-impl Match {
- pub fn pattern_id(&self) -> usize {
- self.pattern.id
- }
-
- /// The starting position of the match.
- pub fn start(&self) -> usize {
- self.end - self.pattern.len
- }
-
- /// The ending position of the match.
- pub fn end(&self) -> usize {
- self.end
- }
-}
-
-impl Pattern {
- fn new(id: usize, len: usize) -> Self {
- Self { id, len }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn aho_corasick_basics() {
- t(&[], "", &[]);
- t(&["a"], "", &[]);
- t(&["a"], "a", &[(0, 0, 1)]);
- t(&["a"], "aa", &[(0, 0, 1), (0, 1, 2)]);
- t(&["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]);
- t(&["a"], "aba", &[(0, 0, 1), (0, 2, 3)]);
- t(&["a"], "bba", &[(0, 2, 3)]);
- t(&["a"], "bbb", &[]);
- t(&["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]);
- t(&["aa"], "", &[]);
- t(&["aa"], "aa", &[(0, 0, 2)]);
- t(&["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]);
- t(&["aa"], "abbab", &[]);
- t(&["aa"], "abbabaa", &[(0, 5, 7)]);
- t(&["abc"], "abc", &[(0, 0, 3)]);
- t(&["abc"], "zazabzabcz", &[(0, 6, 9)]);
- t(&["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]);
- t(&["a", "b"], "", &[]);
- t(&["a", "b"], "z", &[]);
- t(&["a", "b"], "b", &[(1, 0, 1)]);
- t(&["a", "b"], "a", &[(0, 0, 1)]);
- t(
- &["a", "b"],
- "abba",
- &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4)],
- );
- t(
- &["b", "a"],
- "abba",
- &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4)],
- );
- t(&["abc", "bc"], "xbc", &[(1, 1, 3)]);
- t(&["foo", "bar"], "", &[]);
- t(&["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6)]);
- t(&["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6)]);
- t(&["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6)]);
- t(&["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6)]);
- t(&["foo", "bar"], "bafofoo", &[(0, 4, 7)]);
- t(&["bar", "foo"], "bafofoo", &[(1, 4, 7)]);
- t(&["foo", "bar"], "fobabar", &[(1, 4, 7)]);
- t(&["bar", "foo"], "fobabar", &[(0, 4, 7)]);
- t(&[""], "", &[(0, 0, 0)]);
- t(&[""], "a", &[(0, 0, 0), (0, 1, 1)]);
- t(&[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]);
- t(&["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7)]);
- t(&["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10)]);
- t(
- &["yabcdef", "bcdeyabc", "abcdezghi"],
- "yabcdezghi",
- &[(2, 1, 10)],
- );
- }
-
- #[test]
- fn aho_corasick_non_overlapping() {
- t(&["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]);
- t(&["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]);
- t(&["abc", "bc"], "zazabcz", &[(0, 3, 6)]);
- t(&["ab", "ba"], "abababa", &[(0, 0, 2), (0, 2, 4), (0, 4, 6)]);
- t(&["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9)]);
- t(&["", ""], "", &[(0, 0, 0)]);
- t(&["", ""], "a", &[(0, 0, 0), (0, 1, 1)]);
- }
-
- #[test]
- fn aho_corasick_leftmost() {
- t(&["ab", "ab"], "abcd", &[(0, 0, 2)]);
- t(&["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]);
- t(&["", ""], "a", &[(0, 0, 0), (0, 1, 1)]);
- t(&["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]);
- t(&["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]);
- t(&["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]);
- t(&["abcd", "bce", "b"], "abce", &[(1, 1, 4)]);
- t(&["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]);
- t(&["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]);
- t(&["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]);
- t(&["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]);
- t(&["abc", "bd", "ab"], "abd", &[(2, 0, 2)]);
- t(&["abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[(2, 0, 8)]);
- t(
- &["abcdefghi", "cde", "hz", "abcdefgh"],
- "abcdefghz",
- &[(3, 0, 8)],
- );
- t(
- &["abcdefghi", "hz", "abcdefgh", "a"],
- "abcdefghz",
- &[(2, 0, 8)],
- );
- t(
- &["b", "abcdefghi", "hz", "abcdefgh"],
- "abcdefghz",
- &[(3, 0, 8)],
- );
- t(
- &["h", "abcdefghi", "hz", "abcdefgh"],
- "abcdefghz",
- &[(3, 0, 8)],
- );
- t(
- &["z", "abcdefghi", "hz", "abcdefgh"],
- "abcdefghz",
- &[(3, 0, 8), (0, 8, 9)],
- );
- }
-
- #[test]
- fn aho_corasick_leftmost_longest() {
- t(&["ab", "abcd"], "abcd", &[(1, 0, 4)]);
- t(&["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]);
- t(&["", "a"], "a", &[(1, 0, 1), (0, 1, 1)]);
- t(&["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1)]);
- t(&["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1)]);
- t(&["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1)]);
- t(&["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2)]);
- t(&["a", "ab"], "a", &[(0, 0, 1)]);
- t(&["a", "ab"], "ab", &[(1, 0, 2)]);
- t(&["ab", "a"], "a", &[(1, 0, 1)]);
- t(&["ab", "a"], "ab", &[(0, 0, 2)]);
- t(&["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]);
- t(&["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]);
- t(&["abcd", "b", "bce"], "abce", &[(2, 1, 4)]);
- t(
- &["a", "abcdefghi", "hz", "abcdefgh"],
- "abcdefghz",
- &[(3, 0, 8)],
- );
- t(&["a", "abab"], "abab", &[(1, 0, 4)]);
- t(&["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4)]);
- t(&["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]);
- }
-
- #[track_caller]
- fn t(patterns: &[&str], haystack: &str, exp: &[(usize, usize, usize)]) {
- let ac = AhoCorasick::new(patterns.iter().enumerate());
- let matches: Vec<_> = ac
- .find_iter(haystack.as_ref())
- .map(|m| (m.pattern_id(), m.start(), m.end()))
- .take(10)
- .collect();
- assert_eq!(matches, exp);
- }
-
- impl AhoCorasick {
- pub fn find_iter<'a>(&'a self, haystack: &'a [u8]) -> impl Iterator
- + 'a {
- let mut pos = 0;
- std::iter::from_fn(move || {
- if pos > haystack.len() {
- return None;
- }
- let mat = self.find_at(haystack, pos)?;
- if mat.end() == pos {
- // If the automaton can match the empty string and if we
- // found an empty match, then we need to forcefully move the
- // position.
- pos += 1;
- } else {
- pos = mat.end();
- }
-
- Some(mat)
- })
- }
- }
-}
diff --git a/src/compile/search/ahocorasick/state.rs b/src/compile/search/ahocorasick/state.rs
deleted file mode 100644
index d2f0806..0000000
--- a/src/compile/search/ahocorasick/state.rs
+++ /dev/null
@@ -1,41 +0,0 @@
-use super::Pattern;
-
-/// A unique identifier for a state.
-pub type S = usize;
-
-/// The identifier for an automaton's fail state.
-pub const FAIL: S = 0;
-
-/// The identifier for an automaton's dead state.
-pub const DEAD: S = 1;
-
-/// The identifier for an automaton's start state.
-pub const START: S = 2;
-
-/// A state in an Aho-Corasick automaton.
-#[cfg_attr(internal_debug, derive(Debug))]
-pub struct State {
- /// The transitions to the next state.
- pub trans: [S; 256],
-
- /// The failure transition.
- pub fail: S,
-
- /// The patterns that are matched at this state.
- pub matches: Vec,
-
- /// The distance from the start state in the automaton.
- pub depth: usize,
-}
-
-impl State {
- /// Returns the next state for the given input byte.
- pub fn next_state(&self, byte: u8) -> S {
- self.trans[byte as usize]
- }
-
- /// Whether or not this state contains any matches.
- pub fn is_match(&self) -> bool {
- !self.matches.is_empty()
- }
-}
diff --git a/src/compile/search/mod.rs b/src/compile/search/mod.rs
index 9cb308b..ed8620d 100644
--- a/src/compile/search/mod.rs
+++ b/src/compile/search/mod.rs
@@ -1,38 +1,112 @@
-mod ahocorasick;
+#[cfg(feature = "syntax")]
+mod aho_corasick;
-use crate::compile::search::ahocorasick::AhoCorasick;
-use crate::types::syntax::{Kind, Syntax};
+#[cfg(feature = "syntax")]
+use crate::compile::search::aho_corasick::AhoCorasickSearcher;
+use crate::types::delimiter::Delimiter;
+#[cfg(feature = "syntax")]
+use crate::types::syntax::Syntax;
#[cfg_attr(internal_debug, derive(Debug))]
-pub struct Searcher {
- imp: AhoCorasick,
+pub enum Searcher {
+ Default(DefaultSearcher),
+ #[cfg(feature = "syntax")]
+ AhoCorasick(AhoCorasickSearcher),
}
impl Searcher {
- pub fn new(syntax: Syntax) -> Self {
- let imp = AhoCorasick::new(syntax.patterns);
- Self { imp }
+ pub fn new() -> Self {
+ Self::Default(DefaultSearcher)
}
- pub fn find_at(&self, haystack: T, at: usize) -> Option<(Kind, usize, usize)>
- where
- T: AsRef<[u8]>,
- {
- self.imp.find_at(haystack, at).map(|m| {
- let kind = Kind::from_usize(m.pattern_id());
- (kind, m.start(), m.end())
- })
+ #[cfg(feature = "syntax")]
+ pub fn with_syntax(syntax: Syntax) -> Self {
+ Self::AhoCorasick(AhoCorasickSearcher::new(syntax))
}
- pub fn starts_with(&self, haystack: T, at: usize) -> Option<(Kind, usize)>
- where
- T: AsRef<[u8]>,
- {
- let (kind, i, j) = self.find_at(haystack, at)?;
- if at == i {
- Some((kind, j))
- } else {
- None
+ #[inline]
+ pub fn find_at(&self, source: &str, at: usize) -> Option<(Delimiter, usize, usize)> {
+ match self {
+ Self::Default(searcher) => searcher.find_at(source, at),
+ #[cfg(feature = "syntax")]
+ Self::AhoCorasick(searcher) => searcher.find_at(source, at),
+ }
+ }
+
+ #[inline]
+ pub fn starts_with(&self, source: &str, i: usize) -> Option<(Delimiter, usize)> {
+ match self {
+ Self::Default(searcher) => searcher.starts_with(source, i),
+ #[cfg(feature = "syntax")]
+ Self::AhoCorasick(searcher) => searcher.starts_with(source, i),
+ }
+ }
+}
+
+#[cfg_attr(internal_debug, derive(Debug))]
+pub struct DefaultSearcher;
+
+impl DefaultSearcher {
+ #[inline]
+ fn find_at(&self, source: &str, mut at: usize) -> Option<(Delimiter, usize, usize)> {
+ let sb = source.as_bytes();
+ loop {
+ let mark = at + sb[at..].iter().position(|&b| b == b'{' || b == b'}')?;
+ if sb[mark] == b'{' {
+ let i = mark;
+ match &sb[i..] {
+ // expr
+ [b'{', b'{', b'-', ..] => return Some((Delimiter::BeginExprTrim, i, i + 3)),
+ [b'{', b'{', ..] => return Some((Delimiter::BeginExpr, i, i + 2)),
+ // block
+ [b'{', b'%', b'-', ..] => return Some((Delimiter::BeginBlockTrim, i, i + 3)),
+ [b'{', b'%', ..] => return Some((Delimiter::BeginBlock, i, i + 2)),
+ // comment
+ [b'{', b'#', b'-', ..] => return Some((Delimiter::BeginCommentTrim, i, i + 3)),
+ [b'{', b'#', ..] => return Some((Delimiter::BeginComment, i, i + 2)),
+ _ => at = i + 1,
+ }
+ } else {
+ let j = mark + 1;
+ let i = j.saturating_sub(3);
+ match &sb[i..] {
+ // expr
+ [b'-', b'}', b'}', ..] => return Some((Delimiter::EndExprTrim, i, i + 3)),
+ [_, b'}', b'}', ..] => return Some((Delimiter::EndExprTrim, i + 1, i + 3)),
+ [b'}', b'}', ..] => return Some((Delimiter::EndExpr, i, i + 2)),
+ // block
+ [b'-', b'%', b'}', ..] => return Some((Delimiter::EndBlockTrim, i, i + 3)),
+ [_, b'%', b'}', ..] => return Some((Delimiter::EndBlock, i + 1, i + 3)),
+ [b'%', b'}', ..] => return Some((Delimiter::EndBlock, i, i + 2)),
+ // comment
+ [b'-', b'#', b'}', ..] => return Some((Delimiter::EndCommentTrim, i, i + 3)),
+ [_, b'#', b'}', ..] => return Some((Delimiter::EndComment, i + 1, i + 3)),
+ [b'#', b'}', ..] => return Some((Delimiter::EndComment, i, i + 2)),
+ _ => at = j,
+ }
+ }
+ }
+ }
+
+ #[inline]
+ fn starts_with(&self, source: &str, i: usize) -> Option<(Delimiter, usize)> {
+ let sb = source.as_bytes();
+ match &sb[i..] {
+ // begin
+ [b'{', b'{', b'-', ..] => Some((Delimiter::BeginExprTrim, i + 3)),
+ [b'{', b'{', ..] => Some((Delimiter::BeginExpr, i + 2)),
+ [b'{', b'%', b'-', ..] => Some((Delimiter::BeginBlockTrim, i + 3)),
+ [b'{', b'%', ..] => Some((Delimiter::BeginBlock, i + 2)),
+ [b'{', b'#', b'-', ..] => Some((Delimiter::BeginCommentTrim, i + 3)),
+ [b'{', b'#', ..] => Some((Delimiter::BeginComment, i + 2)),
+ // end
+ [b'-', b'}', b'}', ..] => Some((Delimiter::EndExprTrim, i + 3)),
+ [b'}', b'}', ..] => Some((Delimiter::EndExpr, i + 2)),
+ [b'-', b'%', b'}', ..] => Some((Delimiter::EndBlockTrim, i + 3)),
+ [b'%', b'}', ..] => Some((Delimiter::EndBlock, i + 2)),
+ [b'-', b'#', b'}', ..] => Some((Delimiter::EndCommentTrim, i + 3)),
+ [b'#', b'}', ..] => Some((Delimiter::EndComment, i + 2)),
+ _ => None,
}
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 11410f4..dc5aa5d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -117,6 +117,10 @@
//! [`render_from(..)`][TemplateRef::render_from] to render templates and
//! construct the context using [`Value`]'s `From` impls.
//!
+//! - **`syntax`** _(disabled by default)_ — Enables support for configuring
+//! custom delimiters in templates (see [`Engine::with_syntax`]) and pulls in
+//! the [`aho-corasick`][aho_corasick] crate.
+//!
//! - **`unicode`** _(enabled by default)_ — Enables unicode support and pulls
//! in the [`unicode-ident`][unicode_ident] and
//! [`unicode-width`][unicode_width] crates. If disabled then unicode
@@ -239,6 +243,8 @@ use std::collections::BTreeMap;
pub use crate::error::Error;
pub use crate::render::Renderer;
+#[cfg(feature = "syntax")]
+#[cfg_attr(docsrs, doc(cfg(feature = "syntax")))]
pub use crate::types::syntax::{Syntax, SyntaxBuilder};
#[cfg(feature = "serde")]
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
@@ -344,7 +350,7 @@ impl<'engine> Engine<'engine> {
/// Construct a new engine.
#[inline]
pub fn new() -> Self {
- Self::with_syntax(Syntax::default())
+ Self::with_searcher(Searcher::new())
}
/// Construct a new engine with custom syntax.
@@ -357,10 +363,24 @@ impl<'engine> Engine<'engine> {
/// let syntax = Syntax::builder().expr("<{", "}>").block("<[", "]>").build();
/// let engine = Engine::with_syntax(syntax);
/// ```
+ ///
+ /// # Note
+ ///
+ /// Passing a custom syntax to this function always uses the `aho-corasick`
+ /// implementation for searching. This means that even if you pass the
+ /// default syntax to this function it is *not* equivalent to
+ /// [`Engine::new()`][Engine::new].
+ #[cfg_attr(docsrs, doc(cfg(feature = "syntax")))]
+ #[cfg(feature = "syntax")]
#[inline]
pub fn with_syntax(syntax: Syntax<'engine>) -> Self {
+ Self::with_searcher(Searcher::with_syntax(syntax))
+ }
+
+ #[inline]
+ fn with_searcher(searcher: Searcher) -> Self {
Self {
- searcher: Searcher::new(syntax),
+ searcher,
default_formatter: &fmt::default,
functions: BTreeMap::new(),
templates: BTreeMap::new(),
diff --git a/src/syntax.rs b/src/syntax.rs
index c74e1e2..0d321f7 100644
--- a/src/syntax.rs
+++ b/src/syntax.rs
@@ -5,8 +5,8 @@
//! contains [**expressions**](#expressions) for rendering values and
//! [**blocks**](#blocks) for controlling logic. These require you to use
//! specific syntax delimiters in the template. Because `upon` allows you to
-//! configure these delimiters, this document will only refer to the
-//! [**default**][crate::Syntax::default] configuration.
+//! configure these delimiters, this document will only refer to the **default**
+//! configuration.
//!
//! # Expressions
//!
diff --git a/src/types/delimiter.rs b/src/types/delimiter.rs
new file mode 100644
index 0000000..10aad74
--- /dev/null
+++ b/src/types/delimiter.rs
@@ -0,0 +1,15 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Delimiter {
+ BeginExpr,
+ EndExpr,
+ BeginExprTrim,
+ EndExprTrim,
+ BeginBlock,
+ EndBlock,
+ BeginBlockTrim,
+ EndBlockTrim,
+ BeginComment,
+ EndComment,
+ BeginCommentTrim,
+ EndCommentTrim,
+}
diff --git a/src/types/mod.rs b/src/types/mod.rs
index 98ca6de..3ab9a8c 100644
--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@@ -1,4 +1,6 @@
pub mod ast;
+pub mod delimiter;
pub mod program;
pub mod span;
+#[cfg(feature = "syntax")]
pub mod syntax;
diff --git a/src/types/syntax.rs b/src/types/syntax.rs
index 2f4c141..2254c82 100644
--- a/src/types/syntax.rs
+++ b/src/types/syntax.rs
@@ -1,18 +1,21 @@
use std::marker::PhantomData;
+use crate::types::delimiter::Delimiter;
+
/// The template syntax configuration.
///
/// Use [`Syntax::default()`] to get the default syntax configuration and
/// [`Syntax::builder()`] to create a custom syntax configuration.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Syntax<'a> {
- pub(crate) patterns: Vec<(Kind, String)>,
+ pub(crate) marks: Vec,
+ pub(crate) patterns: Vec,
_marker: PhantomData<&'a ()>,
}
/// A builder for the syntax configuration.
///
-/// This struct is typically created using [`Syntax::builder()`].
+/// This struct is created using [`Syntax::builder()`].
#[derive(Debug, Clone)]
pub struct SyntaxBuilder<'a> {
expr: Option<(&'a str, &'a str)>,
@@ -20,30 +23,6 @@ pub struct SyntaxBuilder<'a> {
comment: Option<(&'a str, &'a str)>,
}
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum Kind {
- BeginExpr = 0,
- EndExpr = 1,
- BeginExprTrim = 2,
- EndExprTrim = 3,
- BeginBlock = 4,
- EndBlock = 5,
- BeginBlockTrim = 6,
- EndBlockTrim = 7,
- BeginComment = 8,
- EndComment = 9,
- BeginCommentTrim = 10,
- EndCommentTrim = 11,
-}
-
-#[test]
-fn kind_usize() {
- for p in 0..12 {
- let k = Kind::from_usize(p);
- assert_eq!(k as usize, p);
- }
-}
-
impl Default for Syntax<'_> {
/// Returns the default syntax configuration.
///
@@ -81,22 +60,15 @@ impl<'a> Syntax<'a> {
/// ```
#[inline]
pub fn builder() -> SyntaxBuilder<'a> {
- SyntaxBuilder::new()
- }
-}
-
-impl<'a> SyntaxBuilder<'a> {
- /// Creates a new syntax builder.
- #[inline]
- #[allow(clippy::new_without_default)]
- pub fn new() -> Self {
- Self {
+ SyntaxBuilder {
expr: None,
block: None,
comment: None,
}
}
+}
+impl<'a> SyntaxBuilder<'a> {
/// Set the block syntax.
///
/// If not set then the expression syntax will not be available.
@@ -141,54 +113,34 @@ impl<'a> SyntaxBuilder<'a> {
/// Builds the syntax configuration.
pub fn build(&self) -> Syntax<'a> {
+ let mut kinds = Vec::new();
let mut patterns = Vec::new();
+ let mut push = |kind, pattern| {
+ kinds.push(kind);
+ patterns.push(pattern);
+ };
if let Some((begin, end)) = self.expr {
- patterns.push((Kind::BeginExpr, begin.into()));
- patterns.push((Kind::EndExpr, end.into()));
- patterns.push((Kind::BeginExprTrim, format!("{begin}-")));
- patterns.push((Kind::EndExprTrim, format!("-{end}")));
+ push(Delimiter::BeginExpr, begin.into());
+ push(Delimiter::EndExpr, end.into());
+ push(Delimiter::BeginExprTrim, format!("{begin}-"));
+ push(Delimiter::EndExprTrim, format!("-{end}"));
};
if let Some((begin, end)) = self.block {
- patterns.push((Kind::BeginBlock, begin.into()));
- patterns.push((Kind::EndBlock, end.into()));
- patterns.push((Kind::BeginBlockTrim, format!("{begin}-")));
- patterns.push((Kind::EndBlockTrim, format!("-{end}")));
+ push(Delimiter::BeginBlock, begin.into());
+ push(Delimiter::EndBlock, end.into());
+ push(Delimiter::BeginBlockTrim, format!("{begin}-"));
+ push(Delimiter::EndBlockTrim, format!("-{end}"));
}
if let Some((begin, end)) = self.comment {
- patterns.push((Kind::BeginComment, begin.into()));
- patterns.push((Kind::EndComment, end.into()));
- patterns.push((Kind::BeginCommentTrim, format!("{begin}-")));
- patterns.push((Kind::EndCommentTrim, format!("-{end}")));
+ push(Delimiter::BeginComment, begin.into());
+ push(Delimiter::EndComment, end.into());
+ push(Delimiter::BeginCommentTrim, format!("{begin}-"));
+ push(Delimiter::EndCommentTrim, format!("-{end}"));
}
Syntax {
+ marks: kinds,
patterns,
_marker: PhantomData,
}
}
}
-
-impl Kind {
- pub fn from_usize(id: usize) -> Self {
- match id {
- 0 => Self::BeginExpr,
- 1 => Self::EndExpr,
- 2 => Self::BeginExprTrim,
- 3 => Self::EndExprTrim,
- 4 => Self::BeginBlock,
- 5 => Self::EndBlock,
- 6 => Self::BeginBlockTrim,
- 7 => Self::EndBlockTrim,
- 8 => Self::BeginComment,
- 9 => Self::EndComment,
- 10 => Self::BeginCommentTrim,
- 11 => Self::EndCommentTrim,
- _ => unreachable!(),
- }
- }
-}
-
-impl From for usize {
- fn from(k: Kind) -> Self {
- k as usize
- }
-}
diff --git a/tests/lex.rs b/tests/lex.rs
index 89fb1d1..dc77674 100644
--- a/tests/lex.rs
+++ b/tests/lex.rs
@@ -1,4 +1,4 @@
-use upon::{Engine, Error, Syntax};
+use upon::{Engine, Error};
#[test]
fn lex_while_eof() {
@@ -17,17 +17,25 @@ fn lex_while_eof() {
);
}
+#[cfg(feature = "syntax")]
#[test]
fn lex_syntax_overlapping() {
- let syntax = Syntax::builder().expr("{", "}").block("{{", "}}").build();
+ let syntax = upon::Syntax::builder()
+ .expr("{", "}")
+ .block("{{", "}}")
+ .build();
Engine::with_syntax(syntax)
.compile("lorem { ipsum } {{ if dolor }} {{ endif }} sit amet")
.unwrap();
}
+#[cfg(feature = "syntax")]
#[test]
fn lex_syntax_overlapping_flipped() {
- let syntax = Syntax::builder().expr("{{", "}}").block("{", "}").build();
+ let syntax = upon::Syntax::builder()
+ .expr("{{", "}}")
+ .block("{", "}")
+ .build();
Engine::with_syntax(syntax)
.compile("lorem {{ ipsum }} { if dolor } { endif } sit amet")
.unwrap();
@@ -40,9 +48,13 @@ fn lex_syntax_whitespace_trimming() {
.unwrap();
}
+#[cfg(feature = "syntax")]
#[test]
fn lex_syntax_precedence() {
- let syntax = Syntax::builder().expr("{|", "|}").block("{", "}").build();
+ let syntax = upon::Syntax::builder()
+ .expr("{|", "|}")
+ .block("{", "}")
+ .build();
Engine::with_syntax(syntax)
.compile("lorem {| ipsum | dolor |} sit")
.unwrap();