Skip to content

Commit

Permalink
add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lapla-cogito committed Nov 30, 2024
1 parent 6392092 commit 425f7cb
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 34 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
A hobby regular expression engine in Rust.

- supports Unicode characters
- engines
- implements 3 engines:
- DFA-based engine
- convert regex to NFA
- convert NFA to DFA
- VM-based engine
- caching
- cache
- Derivative-based engine
- use Brzozowski's derivatives

## example

Expand Down Expand Up @@ -123,4 +124,4 @@ $ cargo bench

## license

MIT
MIT
26 changes: 1 addition & 25 deletions src/automaton/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ impl Dfa {
return false;
}
}

self.accept().contains(&state)
}
}
Expand All @@ -111,31 +112,6 @@ impl Dfa {
mod tests {
use super::*;

#[test]
fn e_closure() {
let mut lexer = crate::lexer::Lexer::new("a|b*");
let mut parser = crate::parser::Parser::new(&mut lexer);
let nfa = crate::automaton::nfa::Nfa::new_from_node(
parser.parse().unwrap(),
&mut crate::automaton::nfa::NfaState::new(),
)
.unwrap();

let closure = nfa.epsilon_closure([nfa.start()].iter().cloned().collect());
assert_eq!(closure, [0, 2, 4, 5].iter().cloned().collect());

let mut lexer = crate::lexer::Lexer::new("a|b|c");
let mut parser = crate::parser::Parser::new(&mut lexer);
let nfa = crate::automaton::nfa::Nfa::new_from_node(
parser.parse().unwrap(),
&mut crate::automaton::nfa::NfaState::new(),
)
.unwrap();

let closure = nfa.epsilon_closure([nfa.start()].iter().cloned().collect());
assert_eq!(closure, [0, 2, 4, 6, 7].iter().cloned().collect());
}

#[test]
fn test_dfa_from_nfa() {
let nfa = crate::automaton::nfa::Nfa::new_from_node(
Expand Down
21 changes: 21 additions & 0 deletions src/automaton/nfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,4 +491,25 @@ mod tests {
.collect()
);
}

#[test]
fn e_closure() {
let mut lexer = crate::lexer::Lexer::new("a|b*");
let mut parser = crate::parser::Parser::new(&mut lexer);
let nfa = Nfa::new_from_node(
parser.parse().unwrap(),
&mut crate::automaton::nfa::NfaState::new(),
)
.unwrap();

let closure = nfa.epsilon_closure([nfa.start()].iter().cloned().collect());
assert_eq!(closure, [0, 2, 4, 5].iter().cloned().collect());

let mut lexer = crate::lexer::Lexer::new("a|b|c");
let mut parser = crate::parser::Parser::new(&mut lexer);
let nfa = Nfa::new_from_node(parser.parse().unwrap(), &mut NfaState::new()).unwrap();

let closure = nfa.epsilon_closure([nfa.start()].iter().cloned().collect());
assert_eq!(closure, [0, 2, 4, 6, 7].iter().cloned().collect());
}
}
39 changes: 33 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ mod tests {
}

#[test]
fn multibyte_dfa() {
fn nonascii_dfa() {
let regex = RustRegex::new("あ|い*", "dfa").unwrap();
assert!(regex.is_match("あ"));
assert!(regex.is_match("い"));
Expand Down Expand Up @@ -193,11 +193,20 @@ mod tests {
assert!(regex.is_match("♥"));
assert!(!regex.is_match("♡"));
assert!(!regex.is_match("👎️"));

let regex = RustRegex::new("ගවයා|ng'ombe", "dfa").unwrap();
assert!(regex.is_match("ගවයා"));
assert!(regex.is_match("ng'ombe"));
assert!(!regex.is_match("ගවයාng'ombe"));

let regex = RustRegex::new("(පරිගණකය)*", "dfa").unwrap();
assert!(regex.is_match("පරිගණකය"));
assert!(regex.is_match(""));
}

#[test]
fn invalid_dfa() {
for test in ["a(b", "*", ")c", "*", "+"] {
for test in ["a(b", "*", ")c", "+"] {
let regex = RustRegex::new(test, "dfa");
assert!(regex.is_err());
}
Expand Down Expand Up @@ -273,7 +282,7 @@ mod tests {
}

#[test]
fn multibyte_vm() {
fn nonascii_vm() {
let regex = RustRegex::new("あ|い*", "vm").unwrap();
assert!(regex.is_match("あ"));
assert!(regex.is_match("い"));
Expand Down Expand Up @@ -327,11 +336,20 @@ mod tests {
assert!(regex.is_match("♥"));
assert!(!regex.is_match("♡"));
assert!(!regex.is_match("👎️"));

let regex = RustRegex::new("ගවයා|ng'ombe", "vm").unwrap();
assert!(regex.is_match("ගවයා"));
assert!(regex.is_match("ng'ombe"));
assert!(!regex.is_match("ගවයාng'ombe"));

let regex = RustRegex::new("(පරිගණකය)*", "vm").unwrap();
assert!(regex.is_match("පරිගණකය"));
assert!(regex.is_match(""));
}

#[test]
fn invalid_vm() {
for test in ["a(b", "*", ")c", "*", "+"] {
for test in ["a(b", "*", ")c", "+"] {
let regex = RustRegex::new(test, "vm");
assert!(regex.is_err());
}
Expand Down Expand Up @@ -407,7 +425,7 @@ mod tests {
}

#[test]
fn multibyte_derivative() {
fn nonascii_derivative() {
let regex = RustRegex::new("あ|い*", "derivative").unwrap();
assert!(regex.is_match("あ"));
assert!(regex.is_match("い"));
Expand Down Expand Up @@ -461,11 +479,20 @@ mod tests {
assert!(regex.is_match("♥"));
assert!(!regex.is_match("♡"));
assert!(!regex.is_match("👎️"));

let regex = RustRegex::new("ගවයා|ng'ombe", "derivative").unwrap();
assert!(regex.is_match("ගවයා"));
assert!(regex.is_match("ng'ombe"));
assert!(!regex.is_match("ගවයාng'ombe"));

let regex = RustRegex::new("(පරිගණකය)*", "derivative").unwrap();
assert!(regex.is_match("පරිගණකය"));
assert!(regex.is_match(""));
}

#[test]
fn invalid_derivative() {
for test in ["a(b", "*", ")c", "*", "+"] {
for test in ["a(b", "*", ")c", "+"] {
let regex = RustRegex::new(test, "derivative");
assert!(regex.is_err());
}
Expand Down

0 comments on commit 425f7cb

Please sign in to comment.