From d0d92bad7ce9ad9fa2ace3351f96bfa18c65ba99 Mon Sep 17 00:00:00 2001 From: T6 Date: Tue, 9 Apr 2024 17:04:11 -0400 Subject: [PATCH] use tspl for parser (#122) --- Cargo.lock | 16 ++++++ Cargo.toml | 1 + cspell.json | 1 + src/ast.rs | 138 ++++++++++++---------------------------------------- 4 files changed, 50 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df6e4aa4..0fe98f1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "TSPL" +version = "0.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9423b1e6e2d6c0bbc03660f58f9c30f55359e13afea29432e6e767c0f7dc25" +dependencies = [ + "highlight_error", +] + [[package]] name = "aho-corasick" version = "1.1.2" @@ -278,10 +287,17 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "highlight_error" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e18805660d7b6b2e2b9f316a5099521b5998d5cba4dda11b5157a21aaef03" + [[package]] name = "hvm-core" version = "0.2.24" dependencies = [ + "TSPL", "arrayvec", "clap", "insta", diff --git a/Cargo.toml b/Cargo.toml index e3263181..907a31c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ panic = "abort" debug = "full" [dependencies] +TSPL = "0.0.9" arrayvec = "0.7.4" clap = { version = "4.5.1", features = ["derive"] } nohash-hasher = { version = "0.2.0" } diff --git a/cspell.json b/cspell.json index cf626b3d..d6033f97 100644 --- a/cspell.json +++ b/cspell.json @@ -58,6 +58,7 @@ "tids", "tlog", "trgs", + "tspl", "trit", "uninit", "unioned", diff --git a/src/ast.rs b/src/ast.rs index be7c6987..b45e3b66 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -18,6 +18,7 @@ use crate::{ util::{array_vec, deref, maybe_grow}, }; use std::{collections::BTreeMap, fmt, mem, str::FromStr}; +use TSPL::{new_parser, Parser}; /// The top level AST node, representing a collection of named nets. /// @@ -194,13 +195,9 @@ impl Tree { } } -/// The state of the HVMC parser. -struct Parser<'i> { - /// The remaining characters in the input. An empty string indicates EOF. - input: &'i str, -} +new_parser!(HvmcParser); -impl<'i> Parser<'i> { +impl<'i> HvmcParser<'i> { /// Book = ("@" Name "=" Net)* fn parse_book(&mut self) -> Result { maybe_grow(move || { @@ -231,19 +228,19 @@ impl<'i> Parser<'i> { fn parse_tree(&mut self) -> Result { maybe_grow(move || { self.skip_trivia(); - match self.peek_char() { + match self.peek_one() { // Era = "*" Some('*') => { - self.advance_char(); + self.advance_one(); Ok(Tree::Era) } // Ctr = "(" Tree Tree ")" | "[" Tree Tree "]" | "{" Int Tree Tree "}" Some(char @ ('(' | '[' | '{')) => { - self.advance_char(); + self.advance_one(); let lab = match char { '(' => 0, '[' => 1, - '{' => self.parse_int()? as Lab, + '{' => self.parse_u64()? as Lab, _ => unreachable!(), }; let close = match char { @@ -253,18 +250,18 @@ impl<'i> Parser<'i> { _ => unreachable!(), }; self.skip_trivia(); - if self.peek_char().is_some_and(|x| x == ':') { - self.advance_char(); - let variant_index = self.parse_int()?; + if self.peek_one().is_some_and(|x| x == ':') { + self.advance_one(); + let variant_index = self.parse_u64()?; self.consume(":")?; - let variant_count = self.parse_int()?; + let variant_count = self.parse_u64()?; let mut fields = Vec::new(); self.skip_trivia(); - while self.peek_char() != Some(close) { + while self.peek_one() != Some(close) { fields.push(self.parse_tree()?); self.skip_trivia(); } - self.advance_char(); + self.advance_one(); if variant_count == 0 { Err("variant count cannot be zero".to_owned())?; } @@ -283,11 +280,11 @@ impl<'i> Parser<'i> { } else { let mut ports = Vec::new(); self.skip_trivia(); - while self.peek_char() != Some(close) { + while self.peek_one() != Some(close) { ports.push(self.parse_tree()?); self.skip_trivia(); } - self.advance_char(); + self.advance_one(); if ports.len() > MAX_ARITY { Err("ctr has too many ports".to_owned())?; } @@ -296,25 +293,25 @@ impl<'i> Parser<'i> { } // Ref = "@" Name Some('@') => { - self.advance_char(); + self.advance_one(); self.skip_trivia(); let nam = self.parse_name()?; Ok(Tree::Ref { nam }) } // Num = "#" Int Some('#') => { - self.advance_char(); - match self.peek_char() { + self.advance_one(); + match self.peek_one() { Some('-') => { - self.advance_char(); - Ok(Tree::Num { val: -(self.parse_int()? as i64) }) + self.advance_one(); + Ok(Tree::Num { val: -(self.parse_u64()? as i64) }) } - _ => Ok(Tree::Num { val: self.parse_int()? as i64 }), + _ => Ok(Tree::Num { val: self.parse_u64()? as i64 }), } } // Op = "<" Op Tree Tree ">" Some('<') => { - self.advance_char(); + self.advance_one(); let op = self.parse_op()?; let rhs = Box::new(self.parse_tree()?); let out = Box::new(self.parse_tree()?); @@ -323,13 +320,13 @@ impl<'i> Parser<'i> { } // Mat = "?<" Tree Tree ">" Some('?') => { - self.advance_char(); + self.advance_one(); self.consume("<")?; let zero = self.parse_tree()?; let succ = self.parse_tree()?; self.skip_trivia(); - if self.peek_char() == Some('>') { - self.advance_char(); + if self.peek_one() == Some('>') { + self.advance_one(); Tree::legacy_mat(zero, succ).ok_or_else(|| "invalid legacy match".to_owned()) } else { let zero = Box::new(zero); @@ -349,95 +346,24 @@ impl<'i> Parser<'i> { fn parse_name(&mut self) -> Result { let name = self.take_while(|c| c.is_alphanumeric() || c == '_' || c == '.' || c == '$'); if name.is_empty() { - return Err(format!("Expected a name character, found {:?}", self.peek_char())); + return self.expected("name"); } Ok(name.to_owned()) } - /// Int = /[0-9]+/ | /0x[0-9a-fA-F]+/ | /0b[01]+/ - fn parse_int(&mut self) -> Result { - self.skip_trivia(); - let radix = if let Some(rest) = self.input.strip_prefix("0x") { - self.input = rest; - 16 - } else if let Some(rest) = self.input.strip_prefix("0b") { - self.input = rest; - 2 - } else { - 10 - }; - let mut num: u64 = 0; - if !self.peek_char().map_or(false, |c| c.is_digit(radix)) { - return Err(format!("Expected a digit, found {:?}", self.peek_char())); - } - while let Some(digit) = self.peek_char().and_then(|c| c.to_digit(radix)) { - self.advance_char(); - num = num * (radix as u64) + (digit as u64); - } - Ok(num) - } - /// See `ops.rs` for the available operators. fn parse_op(&mut self) -> Result { let op = self.take_while(|c| "ui0123456789.+-=*/%<>|&^!?$".contains(c)); op.parse().map_err(|_| format!("Unknown operator: {op:?}")) } - - /// Inspects the next character in the input without consuming it. - fn peek_char(&self) -> Option { - self.input.chars().next() - } - - /// Consumes the next character in the input. - fn advance_char(&mut self) -> Option { - let char = self.input.chars().next()?; - self.input = &self.input[char.len_utf8() ..]; - Some(char) - } - - /// Skips whitespace & comments in the input. - fn skip_trivia(&mut self) { - while let Some(c) = self.peek_char() { - if c.is_ascii_whitespace() { - self.advance_char(); - continue; - } - if c == '/' && self.input.starts_with("//") { - while self.peek_char() != Some('\n') { - self.advance_char(); - } - continue; - } - break; - } - } - - /// Consumes an instance of the given string, erroring if it is not found. - fn consume(&mut self, text: &str) -> Result<(), String> { - self.skip_trivia(); - let Some(rest) = self.input.strip_prefix(text) else { - return Err(format!("Expected {:?}, found {:?}", text, self.input.split_ascii_whitespace().next().unwrap_or(""))); - }; - self.input = rest; - Ok(()) - } - - /// Consumes all of the contiguous next characters in the input matching a - /// given predicate. - fn take_while(&mut self, mut f: impl FnMut(char) -> bool) -> &'i str { - let len = self.input.chars().take_while(|&c| f(c)).map(char::len_utf8).sum(); - let (name, rest) = self.input.split_at(len); - self.input = rest; - name - } } /// Parses the input with the callback, ensuring that the whole input is /// consumed. -fn parse_eof<'i, T>(input: &'i str, parse_fn: impl Fn(&mut Parser<'i>) -> Result) -> Result { - let mut parser = Parser { input }; +fn parse_eof<'i, T>(input: &'i str, parse_fn: impl Fn(&mut HvmcParser<'i>) -> Result) -> Result { + let mut parser = HvmcParser::new(input); let out = parse_fn(&mut parser)?; - if !parser.input.is_empty() { + if parser.index != parser.input.len() { return Err("Unable to parse the whole input. Is this not an hvmc file?".to_owned()); } Ok(out) @@ -446,21 +372,21 @@ fn parse_eof<'i, T>(input: &'i str, parse_fn: impl Fn(&mut Parser<'i>) -> Result impl FromStr for Book { type Err = String; fn from_str(str: &str) -> Result { - parse_eof(str, Parser::parse_book) + parse_eof(str, HvmcParser::parse_book) } } impl FromStr for Net { type Err = String; fn from_str(str: &str) -> Result { - parse_eof(str, Parser::parse_net) + parse_eof(str, HvmcParser::parse_net) } } impl FromStr for Tree { type Err = String; fn from_str(str: &str) -> Result { - parse_eof(str, Parser::parse_tree) + parse_eof(str, HvmcParser::parse_tree) } }