Skip to content

Commit

Permalink
add string and comparison operators
Browse files Browse the repository at this point in the history
  • Loading branch information
TommYDeeee committed Mar 21, 2024
1 parent fcff3bf commit 6899e8a
Show file tree
Hide file tree
Showing 11 changed files with 336 additions and 35 deletions.
42 changes: 42 additions & 0 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ pub(crate) enum LogosToken {
Not,
#[regex(r"true|false", |lex| lex.slice().to_string())]
Bool(String),
#[token("contains")]
Contains,
#[token("icontains")]
IContains,
#[token("startswith")]
StartsWith,
#[token("istartswith")]
IStartsWith,
#[token("endswith")]
EndsWith,
#[token("iendswith")]
IEndsWith,
#[token("iequals")]
IEquals,
#[token("matches")]
Matches,

// Patterns
#[regex(r"/(([^\\/\n])|(\\.))+/[a-zA-Z0-9]*", |lex| lex.slice().to_string())]
Expand Down Expand Up @@ -148,6 +164,18 @@ pub(crate) enum LogosToken {
Caret,
#[token(".")]
Dot,
#[token("==")]
Equal,
#[token("!=")]
NotEqual,
#[token("<")]
LessThan,
#[token("<=")]
LessThanEqual,
#[token(">")]
GreaterThan,
#[token(">=")]
GreaterThanEqual,

// Whitespace - I want to preserve whitespace tokens to implement full fidelity
// and error resilience
Expand Down Expand Up @@ -232,6 +260,14 @@ fn logos_tokenkind_to_syntaxkind(token: LogosToken) -> SyntaxKind {
LogosToken::And => SyntaxKind::AND_KW,
LogosToken::Or => SyntaxKind::OR_KW,
LogosToken::Not => SyntaxKind::NOT_KW,
LogosToken::Contains => SyntaxKind::CONTAINS_KW,
LogosToken::IContains => SyntaxKind::ICONTAINS_KW,
LogosToken::StartsWith => SyntaxKind::STARTSWITH_KW,
LogosToken::IStartsWith => SyntaxKind::ISTARTSWITH_KW,
LogosToken::EndsWith => SyntaxKind::ENDSWITH_KW,
LogosToken::IEndsWith => SyntaxKind::IENDSWITH_KW,
LogosToken::IEquals => SyntaxKind::IEQUALS_KW,
LogosToken::Matches => SyntaxKind::MATCHES_KW,
LogosToken::Identifier(_) => SyntaxKind::IDENTIFIER,
LogosToken::Variable(_) => SyntaxKind::VARIABLE,
LogosToken::String(_) => SyntaxKind::STRING_LIT,
Expand Down Expand Up @@ -263,6 +299,12 @@ fn logos_tokenkind_to_syntaxkind(token: LogosToken) -> SyntaxKind {
LogosToken::Ampersand => T![&],
LogosToken::Caret => T![^],
LogosToken::Dot => T![.],
LogosToken::Equal => T![==],
LogosToken::NotEqual => T![!=],
LogosToken::LessThan => T![<],
LogosToken::LessThanEqual => T![<=],
LogosToken::GreaterThan => T![>],
LogosToken::GreaterThanEqual => T![>=],
LogosToken::Integer(_) => SyntaxKind::INT_LIT,
LogosToken::Float(_) => SyntaxKind::FLOAT_LIT,
LogosToken::Bool(_) => SyntaxKind::BOOL_LIT,
Expand Down
31 changes: 21 additions & 10 deletions src/parser/grammar/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,23 +326,14 @@ enum Associativity {
/// Binding powers of operators for a Pratt parser.
fn current_op(p: &mut Parser) -> (u8, SyntaxKind, Associativity) {
match p.current() {
// add support for other operators
T![and] => (4, T![and], Associativity::Left),
T![or] => (2, T![or], Associativity::Left),
_ => (0, ERROR, Associativity::Left),
}
}

fn invalid_op(p: &mut Parser) -> (u8, SyntaxKind, Associativity) {
match p.current() {
// add support for other operators
_ => (0, ERROR, Associativity::Left),
}
}

fn expr_op(p: &mut Parser) -> (u8, SyntaxKind, Associativity) {
match p.current() {
// add support for other operators
T![|] => (10, T![|], Associativity::Left),
T![^] => (12, T![^], Associativity::Left),
T![&] => (14, T![&], Associativity::Left),
Expand All @@ -357,6 +348,26 @@ fn expr_op(p: &mut Parser) -> (u8, SyntaxKind, Associativity) {
}
}

fn expr_stmt_op(p: &mut Parser) -> (u8, SyntaxKind, Associativity) {
match p.current() {
T![==] => (6, T![==], Associativity::Left),
T![!=] => (6, T![!=], Associativity::Left),
T![contains] => (6, T![contains], Associativity::Left),
T![icontains] => (6, T![icontains], Associativity::Left),
T![startswith] => (6, T![startswith], Associativity::Left),
T![istartswith] => (6, T![istartswith], Associativity::Left),
T![endswith] => (6, T![endswith], Associativity::Left),
T![iendswith] => (6, T![iendswith], Associativity::Left),
T![iequals] => (6, T![iequals], Associativity::Left),
T![matches] => (6, T![matches], Associativity::Left),
T![<] => (8, T![<], Associativity::Left),
T![<=] => (8, T![<=], Associativity::Left),
T![>] => (8, T![>], Associativity::Left),
T![>=] => (8, T![>=], Associativity::Left),
_ => (0, ERROR, Associativity::Left),
}
}

/// Parse an expression using a Pratt parser.
///
/// Expression can be binary, unary or literal
Expand Down Expand Up @@ -422,7 +433,7 @@ fn expr_stmt(p: &mut Parser, m: Option<Marker>, bp: u8) -> Option<CompletedMarke
};

loop {
let (op_bp, op, associativity) = invalid_op(p);
let (op_bp, op, associativity) = expr_stmt_op(p);
if op_bp < bp {
break;
}
Expand Down
40 changes: 39 additions & 1 deletion src/parser/syntax_kind/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ pub enum SyntaxKind {
AMPERSAND,
CARET,
DOT,
EQ,
NE,
LT,
LE,
GT,
GE,
AND_KW,
OR_KW,
NOT_KW,
Expand All @@ -49,6 +55,14 @@ pub enum SyntaxKind {
XOR_KW,
BASE64_KW,
BASE64WIDE_KW,
CONTAINS_KW,
ICONTAINS_KW,
STARTSWITH_KW,
ISTARTSWITH_KW,
ENDSWITH_KW,
IENDSWITH_KW,
IEQUALS_KW,
MATCHES_KW,
STRING_LIT,
INT_LIT,
FLOAT_LIT,
Expand Down Expand Up @@ -121,6 +135,14 @@ impl SyntaxKind {
| XOR_KW
| BASE64_KW
| BASE64WIDE_KW
| CONTAINS_KW
| ICONTAINS_KW
| STARTSWITH_KW
| ISTARTSWITH_KW
| ENDSWITH_KW
| IENDSWITH_KW
| IEQUALS_KW
| MATCHES_KW
)
}
pub fn is_punct(self) -> bool {
Expand Down Expand Up @@ -148,6 +170,12 @@ impl SyntaxKind {
| AMPERSAND
| CARET
| DOT
| EQ
| NE
| LT
| LE
| GT
| GE
)
}
pub fn is_literal(self) -> bool {
Expand All @@ -173,6 +201,14 @@ impl SyntaxKind {
"xor" => XOR_KW,
"base64" => BASE64_KW,
"base64wide" => BASE64WIDE_KW,
"contains" => CONTAINS_KW,
"icontains" => ICONTAINS_KW,
"startswith" => STARTSWITH_KW,
"istartswith" => ISTARTSWITH_KW,
"endswith" => ENDSWITH_KW,
"iendswith" => IENDSWITH_KW,
"iequals" => IEQUALS_KW,
"matches" => MATCHES_KW,
_ => return None,
};
Some(kw)
Expand All @@ -199,11 +235,13 @@ impl SyntaxKind {
'&' => AMPERSAND,
'^' => CARET,
'.' => DOT,
'<' => LT,
'>' => GT,
_ => return None,
};
Some(tok)
}
}
#[macro_export]
macro_rules ! T { [:] => { $ crate :: SyntaxKind :: COLON } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_BRACE } ; ['}'] => { $ crate :: SyntaxKind :: R_BRACE } ; ['['] => { $ crate :: SyntaxKind :: L_BRACKET } ; [']'] => { $ crate :: SyntaxKind :: R_BRACKET } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; [=] => { $ crate :: SyntaxKind :: ASSIGN } ; [-] => { $ crate :: SyntaxKind :: HYPHEN } ; [?] => { $ crate :: SyntaxKind :: QUESTION_MARK } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [%] => { $ crate :: SyntaxKind :: PERCENTAGE } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [&] => { $ crate :: SyntaxKind :: AMPERSAND } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [and] => { $ crate :: SyntaxKind :: AND_KW } ; [or] => { $ crate :: SyntaxKind :: OR_KW } ; [not] => { $ crate :: SyntaxKind :: NOT_KW } ; [rule] => { $ crate :: SyntaxKind :: RULE_KW } ; [strings] => { $ crate :: SyntaxKind :: STRINGS_KW } ; [condition] => { $ crate :: SyntaxKind :: CONDITION_KW } ; [meta] => { $ crate :: SyntaxKind :: META_KW } ; [private] => { $ crate :: SyntaxKind :: PRIVATE_KW } ; [global] => { $ crate :: SyntaxKind :: GLOBAL_KW } ; [import] => { $ crate :: SyntaxKind :: IMPORT_KW } ; [include] => { $ crate :: SyntaxKind :: INCLUDE_KW } ; [ascii] => { $ crate :: SyntaxKind :: ASCII_KW } ; [wide] => { $ crate :: SyntaxKind :: WIDE_KW } ; [nocase] => { $ crate :: SyntaxKind :: NOCASE_KW } ; [fullword] => { $ crate :: SyntaxKind :: FULLWORD_KW } ; [xor] => { $ crate :: SyntaxKind :: XOR_KW } ; [base64] => { $ crate :: SyntaxKind :: BASE64_KW } ; [base64wide] => { $ crate :: SyntaxKind :: BASE64WIDE_KW } ; [identifier] => { $ crate :: SyntaxKind :: IDENTIFIER } ; [variable] => { $ crate :: SyntaxKind :: VARIABLE } ; [string_lit] => { $ crate :: SyntaxKind :: STRING_LIT } ; [int_lit] => { $ crate :: SyntaxKind :: INT_LIT } ; [float_lit] => { $ crate :: SyntaxKind :: FLOAT_LIT } ; [bool_lit] => { $ crate :: SyntaxKind :: BOOL_LIT } ; [hex_lit] => { $ crate :: SyntaxKind :: HEX_LIT } ; [regex_lit] => { $ crate :: SyntaxKind :: REGEX_LIT } ; [dot_matches_all] => { $ crate :: SyntaxKind :: DOT_MATCHES_ALL } ; [case_insensitive] => { $ crate :: SyntaxKind :: CASE_INSENSITIVE } ; }
macro_rules ! T { [:] => { $ crate :: SyntaxKind :: COLON } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_BRACE } ; ['}'] => { $ crate :: SyntaxKind :: R_BRACE } ; ['['] => { $ crate :: SyntaxKind :: L_BRACKET } ; [']'] => { $ crate :: SyntaxKind :: R_BRACKET } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; [=] => { $ crate :: SyntaxKind :: ASSIGN } ; [-] => { $ crate :: SyntaxKind :: HYPHEN } ; [?] => { $ crate :: SyntaxKind :: QUESTION_MARK } ; [~] => { $ crate :: SyntaxKind :: TILDE } ; [|] => { $ crate :: SyntaxKind :: PIPE } ; [/] => { $ crate :: SyntaxKind :: SLASH } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [*] => { $ crate :: SyntaxKind :: STAR } ; [%] => { $ crate :: SyntaxKind :: PERCENTAGE } ; [<<] => { $ crate :: SyntaxKind :: SHL } ; [>>] => { $ crate :: SyntaxKind :: SHR } ; [&] => { $ crate :: SyntaxKind :: AMPERSAND } ; [^] => { $ crate :: SyntaxKind :: CARET } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [==] => { $ crate :: SyntaxKind :: EQ } ; [!=] => { $ crate :: SyntaxKind :: NE } ; [<] => { $ crate :: SyntaxKind :: LT } ; [<=] => { $ crate :: SyntaxKind :: LE } ; [>] => { $ crate :: SyntaxKind :: GT } ; [>=] => { $ crate :: SyntaxKind :: GE } ; [and] => { $ crate :: SyntaxKind :: AND_KW } ; [or] => { $ crate :: SyntaxKind :: OR_KW } ; [not] => { $ crate :: SyntaxKind :: NOT_KW } ; [rule] => { $ crate :: SyntaxKind :: RULE_KW } ; [strings] => { $ crate :: SyntaxKind :: STRINGS_KW } ; [condition] => { $ crate :: SyntaxKind :: CONDITION_KW } ; [meta] => { $ crate :: SyntaxKind :: META_KW } ; [private] => { $ crate :: SyntaxKind :: PRIVATE_KW } ; [global] => { $ crate :: SyntaxKind :: GLOBAL_KW } ; [import] => { $ crate :: SyntaxKind :: IMPORT_KW } ; [include] => { $ crate :: SyntaxKind :: INCLUDE_KW } ; [ascii] => { $ crate :: SyntaxKind :: ASCII_KW } ; [wide] => { $ crate :: SyntaxKind :: WIDE_KW } ; [nocase] => { $ crate :: SyntaxKind :: NOCASE_KW } ; [fullword] => { $ crate :: SyntaxKind :: FULLWORD_KW } ; [xor] => { $ crate :: SyntaxKind :: XOR_KW } ; [base64] => { $ crate :: SyntaxKind :: BASE64_KW } ; [base64wide] => { $ crate :: SyntaxKind :: BASE64WIDE_KW } ; [contains] => { $ crate :: SyntaxKind :: CONTAINS_KW } ; [icontains] => { $ crate :: SyntaxKind :: ICONTAINS_KW } ; [startswith] => { $ crate :: SyntaxKind :: STARTSWITH_KW } ; [istartswith] => { $ crate :: SyntaxKind :: ISTARTSWITH_KW } ; [endswith] => { $ crate :: SyntaxKind :: ENDSWITH_KW } ; [iendswith] => { $ crate :: SyntaxKind :: IENDSWITH_KW } ; [iequals] => { $ crate :: SyntaxKind :: IEQUALS_KW } ; [matches] => { $ crate :: SyntaxKind :: MATCHES_KW } ; [identifier] => { $ crate :: SyntaxKind :: IDENTIFIER } ; [variable] => { $ crate :: SyntaxKind :: VARIABLE } ; [string_lit] => { $ crate :: SyntaxKind :: STRING_LIT } ; [int_lit] => { $ crate :: SyntaxKind :: INT_LIT } ; [float_lit] => { $ crate :: SyntaxKind :: FLOAT_LIT } ; [bool_lit] => { $ crate :: SyntaxKind :: BOOL_LIT } ; [hex_lit] => { $ crate :: SyntaxKind :: HEX_LIT } ; [regex_lit] => { $ crate :: SyntaxKind :: REGEX_LIT } ; [dot_matches_all] => { $ crate :: SyntaxKind :: DOT_MATCHES_ALL } ; [case_insensitive] => { $ crate :: SyntaxKind :: CASE_INSENSITIVE } ; }
pub use T;
83 changes: 61 additions & 22 deletions src/syntax/ast/generated/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,38 +463,47 @@ impl BooleanTerm {
pub fn expr(&self) -> Option<Expr> {
support::child(&self.syntax)
}
pub fn plus_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![+])
pub fn eq_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![==])
}
pub fn hyphen_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![-])
pub fn ne_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![!=])
}
pub fn star_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![*])
pub fn lt_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![<])
}
pub fn percentage_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![%])
pub fn le_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![<=])
}
pub fn ampersand_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![&])
pub fn gt_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![>])
}
pub fn pipe_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![|])
pub fn ge_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![>=])
}
pub fn caret_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![^])
pub fn contains_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![contains])
}
pub fn shl_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![<<])
pub fn icontains_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![icontains])
}
pub fn shr_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![>>])
pub fn startswith_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![startswith])
}
pub fn tilde_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![~])
pub fn istartswith_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![istartswith])
}
pub fn dot_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![.])
pub fn endswith_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![endswith])
}
pub fn iendswith_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![iendswith])
}
pub fn iequals_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![iequals])
}
pub fn matches_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![matches])
}
pub fn bool_lit_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![bool_lit])
Expand Down Expand Up @@ -524,9 +533,39 @@ impl Expr {
pub fn term(&self) -> Option<Term> {
support::child(&self.syntax)
}
pub fn plus_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![+])
}
pub fn hyphen_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![-])
}
pub fn star_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![*])
}
pub fn percentage_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![%])
}
pub fn ampersand_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![&])
}
pub fn pipe_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![|])
}
pub fn caret_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![^])
}
pub fn shl_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![<<])
}
pub fn shr_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![>>])
}
pub fn tilde_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![~])
}
pub fn dot_token(&self) -> Option<SyntaxToken> {
support::token(&self.syntax, T![.])
}
pub fn expr(&self) -> Option<Expr> {
support::child(&self.syntax)
}
Expand Down
14 changes: 14 additions & 0 deletions src/syntax/tests/ast_src.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ pub(crate) const KINDS_SRC: KindsSrc = KindsSrc {
("&", "AMPERSAND"),
("^", "CARET"),
(".", "DOT"),
("==", "EQ"),
("!=", "NE"),
("<", "LT"),
("<=", "LE"),
(">", "GT"),
(">=", "GE"),
],
keywords: &[
"and",
Expand All @@ -52,6 +58,14 @@ pub(crate) const KINDS_SRC: KindsSrc = KindsSrc {
"xor",
"base64",
"base64wide",
"contains",
"icontains",
"startswith",
"istartswith",
"endswith",
"iendswith",
"iequals",
"matches",
],
literals: &["STRING_LIT", "INT_LIT", "FLOAT_LIT", "HEX_LIT", "BOOL_LIT", "REGEX_LIT"],
tokens: &[
Expand Down
6 changes: 6 additions & 0 deletions src/syntax/tests/sourcegen_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,12 @@ impl Field {
"&" => "ampersand",
"^" => "caret",
"." => "dot",
"==" => "eq",
"!=" => "ne",
"<" => "lt",
"<=" => "le",
">" => "gt",
">=" => "ge",
_ => name,
};
format_ident!("{}_token", name)
Expand Down
6 changes: 6 additions & 0 deletions tests/test21.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//Rule comment
rule test
{
condition:
9 + 9 == 18
}
Loading

0 comments on commit 6899e8a

Please sign in to comment.