From 1162172f36737f77aab6ac49161ac8a4b2e745b2 Mon Sep 17 00:00:00 2001 From: Bruno Andreotti Date: Fri, 23 Feb 2024 10:53:29 -0300 Subject: [PATCH] Add support for GMP-style numerical literals --- carcara/src/parser/error.rs | 4 ++ carcara/src/parser/lexer.rs | 85 +++++++++++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 14 deletions(-) diff --git a/carcara/src/parser/error.rs b/carcara/src/parser/error.rs index 87391245..d8b42ed6 100644 --- a/carcara/src/parser/error.rs +++ b/carcara/src/parser/error.rs @@ -20,6 +20,10 @@ pub enum ParserError { #[error("leading zero in numeral '{0}'")] LeadingZero(String), + /// The lexer encountered a numerical literal that contained a division by zero, e.g. '1/0'. + #[error("divison by zero in numerical literal: '{0}'")] + DivisionByZeroInLiteral(String), + /// The lexer encountered a `\` character while reading a quoted symbol. #[error("quoted symbol contains backslash")] BackslashInQuotedSymbol, diff --git a/carcara/src/parser/lexer.rs b/carcara/src/parser/lexer.rs index d5a50c72..ce831561 100644 --- a/carcara/src/parser/lexer.rs +++ b/carcara/src/parser/lexer.rs @@ -265,7 +265,21 @@ impl Lexer { Some('|') => self.read_quoted_symbol(), Some(':') => self.read_keyword(), Some('#') => self.read_bitvector(), - Some(c) if c.is_ascii_digit() => self.read_number(), + Some('-') => { + // If we encounter the '-' character, the token can either be a GMP-style numerical + // literal (e.g. '-5'), or a symbol that starts with '-' (e.g. the '-' operator + // itself) + self.next_char()?; + if self.current_char.as_ref().is_some_and(char::is_ascii_digit) { + self.read_number(true) + } else { + // This assumes that the symbol is never a reserved a word. + let mut symbol = self.read_chars_while(is_symbol_character)?; + symbol.insert(0, '-'); + Ok(Token::Symbol(symbol)) + } + } + Some(c) if c.is_ascii_digit() => self.read_number(false), Some(c) if is_symbol_character(c) => self.read_simple_symbol(), None => Ok(Token::Eof), Some(other) => Err(Error::Parser( @@ -339,25 +353,45 @@ impl Lexer { } /// Reads an integer or decimal numerical literal. - fn read_number(&mut self) -> CarcaraResult { - let int_part = self.read_chars_while(|c| c.is_ascii_digit())?; + fn read_number(&mut self, negated: bool) -> CarcaraResult { + let first_part = self.read_chars_while(|c| c.is_ascii_digit())?; - if int_part.len() > 1 && int_part.starts_with('0') { + if first_part.len() > 1 && first_part.starts_with('0') { return Err(Error::Parser( - ParserError::LeadingZero(int_part), + ParserError::LeadingZero(first_part), self.position, )); } - if self.current_char == Some('.') { + if let Some(delimiter @ ('/' | '.')) = self.current_char { self.next_char()?; - let frac_part = self.read_chars_while(|c| c.is_ascii_digit())?; - let denom = Integer::from(10u32).pow(frac_part.len() as u32); - let numer = (int_part + &frac_part).parse::().unwrap(); - let r = (numer, denom).into(); - Ok(Token::Decimal(r)) + let second_part = self.read_chars_while(|c| c.is_ascii_digit())?; + if let Some('/' | '.') = self.current_char { + // A number can have only one delimiter + let e = ParserError::UnexpectedChar(self.current_char.unwrap()); + return Err(Error::Parser(e, self.position)); + } + let r = match delimiter { + '/' => { + let [numer, denom] = + [first_part, second_part].map(|s| s.parse::().unwrap()); + if denom.is_zero() { + let e = ParserError::DivisionByZeroInLiteral(format!("{numer}/{denom}")); + return Err(Error::Parser(e, self.position)); + } + Rational::from((numer, denom)) + } + '.' => { + let denom = Integer::from(10u32).pow(second_part.len() as u32); + let numer = (first_part + &second_part).parse::().unwrap(); + Rational::from((numer, denom)) + } + _ => unreachable!(), + }; + Ok(Token::Decimal(if negated { -r } else { r })) } else { - Ok(Token::Numeral(int_part.parse().unwrap())) + let i: Integer = first_part.parse().unwrap(); + Ok(Token::Numeral(if negated { -i } else { i })) } } @@ -428,13 +462,15 @@ mod tests { #[test] fn test_simple_symbols_and_keywords() { - let input = "foo123 :foo123 :a:b +-/*=%?!.$_~&^<>@"; + let input = "foo123 :foo123 :a:b +-/*=%?!.$_~&^<>@ -starts-with-dash --double-dash"; let expected = vec![ Token::Symbol("foo123".into()), Token::Keyword("foo123".into()), Token::Keyword("a".into()), Token::Keyword("b".into()), Token::Symbol("+-/*=%?!.$_~&^<>@".into()), + Token::Symbol("-starts-with-dash".into()), + Token::Symbol("--double-dash".into()), ]; assert_eq!(expected, lex_all(input)); } @@ -464,10 +500,15 @@ mod tests { #[test] fn test_numerals_and_decimals() { - let input = "42 3.14159"; + let input = "42 3.14159 -137 8/3 -5/2 1/1 0/2"; let expected = vec![ Token::Numeral(42.into()), Token::Decimal((314_159, 100_000).into()), + Token::Numeral((-137).into()), + Token::Decimal((8, 3).into()), + Token::Decimal((-5, 2).into()), + Token::Decimal(1.into()), + Token::Decimal(0.into()), ]; assert_eq!(expected, lex_all(input)); @@ -475,6 +516,22 @@ mod tests { lex_one("0123"), Err(Error::Parser(ParserError::LeadingZero(_), _)) )); + assert!(matches!( + lex_one("1.2.3"), + Err(Error::Parser(ParserError::UnexpectedChar(_), _)) + )); + assert!(matches!( + lex_one("1/2.3"), + Err(Error::Parser(ParserError::UnexpectedChar(_), _)) + )); + assert!(matches!( + lex_one("1.2/3"), + Err(Error::Parser(ParserError::UnexpectedChar(_), _)) + )); + assert!(matches!( + lex_one("1/0"), + Err(Error::Parser(ParserError::DivisionByZeroInLiteral(_), _)) + )); } #[test]