From 1443488779a5b7faeb7a507c318f823c0acaf557 Mon Sep 17 00:00:00 2001 From: Nick Date: Sat, 3 Aug 2024 03:46:08 +0300 Subject: [PATCH] refactor(#26): parser types now pass all test cases Signed-off-by: fami-fish --- src/ast.rs | 23 ++--- src/parser.rs | 252 ++++++++++++++++++++++++-------------------------- src/report.rs | 2 + 3 files changed, 129 insertions(+), 148 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index a75d910..40221bd 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -50,20 +50,9 @@ impl Display for Type { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::Size(s) => write!(f, "{s}")?, - Self::Heap { is_pointer: true, contents } => { - // contents length is always 1 for pointers - write!(f, "[{}", &contents[0].0)?; - match contents[0].1 { - Some(0) => write!(f, ":")?, - Some(size) => write!(f, ":{size}")?, - None => {}, - }; - - write!(f, "]")?; - }, - Self::Heap { is_pointer: false, contents } => { - write!(f, "{{")?; - for (t, elems) in contents { + Self::Heap { is_pointer, contents } => { + write!(f, "{}", if *is_pointer { "[" } else { "{" })?; + for (i, (t, elems)) in contents.iter().enumerate() { write!(f, "{t}")?; match elems { Some(0) => write!(f, ":")?, @@ -71,9 +60,11 @@ impl Display for Type { None => {}, }; - write!(f, ", ")?; + if i != contents.len() - 1 { + write!(f, ", ")?; + } } - write!(f, "}}")?; + write!(f, "{}", if *is_pointer { "]" } else { "}" })?; }, Self::Register { inner: t, ident } => { if t.is_some() { diff --git a/src/parser.rs b/src/parser.rs index 5d68352..29e3711 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -154,6 +154,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { span: self.current.span.clone(), }); self.advance(); + // assert_eq!(self.current.kind, TokenKind::NewLine); return ret; }, _ => self.parse_expression(), @@ -371,7 +372,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { } fn parse_register_binding(&mut self, inner: Option) -> Result { - // register binding + // we expect that self is a ; self.advance(); match self.current.kind { @@ -394,9 +395,9 @@ impl<'t, 'contents> Parser<'t, 'contents> { .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: Registers follow the format r. e.g r8 r32") .into(); - } + }; - match self.current.text.strip_prefix('r').unwrap().parse::() { + match self.current.text[1..].parse::() { Err(e) => match e.kind() { IntErrorKind::Empty => ReportKind::SyntaxError .new("Expected register identifier after r prefix") @@ -412,6 +413,8 @@ impl<'t, 'contents> Parser<'t, 'contents> { .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: Registers follow the format r. e.g r8 r32") }, + // Here only positive overflow can be omitted by parse::() + // It also doesnt omit Zero because usize can store 0. _ => ReportKind::SyntaxError .new("Register identifier intager overflows") .with_label(ReportLabel::new(self.current.span.clone())) @@ -422,168 +425,153 @@ impl<'t, 'contents> Parser<'t, 'contents> { } } - fn parse_type(&mut self) -> Result { - match self.current.kind { - TokenKind::Semicolon => self.parse_register_binding(None), - TokenKind::DecimalIntLiteral => { - // We know it lexed so this has to pass, so we can unwrap - let size = self.current.text.parse::().unwrap(); - if size == 0 { - return ReportKind::SyntaxError - .new("Size cannot be zero") - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }; + // We use box here cause we never grow the terminals, so no need for length/capacity which + // comes with Vec + fn parse_array_type(&mut self, terminals: &[TokenKind]) -> Result<(Type, Option)> { + let elem_type = self.parse_type()?; - Ok(Type::Size(size)) - }, - TokenKind::Identifier => Ok(Type::Struct(self.current.text.to_string())), - TokenKind::LBracket => { - self.advance(); - let t = self.parse_type()?; - let mut n = None; + if let Type::Register { ident, .. } = elem_type { + return ReportKind::RegisterWithinHeap + .new("Heaps cannot contain register bindings") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note(format!("{ident}")) + .into(); + } - self.advance(); - if self.current.kind == TokenKind::Colon { + self.advance(); + if self.current.kind == TokenKind::Colon { + for term in terminals { + if *term == self.peek(1).kind { self.advance(); - match self.current.kind { - TokenKind::DecimalIntLiteral => { - n = Some(self.current.text.parse::().unwrap()); - if n == Some(0) { - return ReportKind::SyntaxError - .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean [{t}:]")) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - }, - TokenKind::RBracket => {}, - _ => { - self.advance(); - return ReportKind::UnexpectedToken - .new(format!("got {:?}", self.current.kind)) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }, - } + return Ok((elem_type, Some(0))); } - // We should fail earlier but we wait to gather the element size - // n before logging for clearer error logging - if let Type::Register { inner, ident } = t { - let mut inner_str = String::new(); - let mut n_str = String::new(); - if inner.is_some() { - inner_str = format!("{}", inner.unwrap()); - } + } - if n.is_some() && n.unwrap() != 0 { - n_str = format!("{}", n.unwrap()); - } + self.advance(); - self.advance(); + if self.current.kind == TokenKind::DecimalIntLiteral { + let elem_size = self.current.text.parse::().unwrap(); + if elem_size == 0 { return ReportKind::SyntaxError - .new("Heap types cannot contain register bindings") + .new("Array size cannot be zero.") + .with_note(format!("HINT: Did you mean [{elem_type}:]")) .with_label(ReportLabel::new(self.current.span.clone())) - .with_note(format!("HINT: Did you want to bind the pointer to the register? [{inner_str}:{n_str}];r{ident}")) .into(); } + self.advance(); + return Ok((elem_type, Some(elem_size))); + } + return ReportKind::UnexpectedToken + .new(format!( + "Expected {}, got {:?}", + terminals + .into_iter() + .map(|x| format!("{x:?}")) + .collect::>() + .join(" or "), + self.current.kind + )) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + } - if self.current.kind != TokenKind::RBracket { - self.advance(); + Ok((elem_type, None)) + } + + fn parse_type(&mut self) -> Result { + match self.current.kind { + TokenKind::Semicolon => self.parse_register_binding(None), + TokenKind::DecimalIntLiteral => { + // We know it lexed so this has to pass, so we can unwrap + let Ok(size) = self.current.text.parse::() else { + return ReportKind::SyntaxError + .new("You cant have this many bytes, what are you even doing anyways?? stack overflow?") + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + }; + + if size == 0 { return ReportKind::SyntaxError - .new(format!("Expected closing bracket. Got {:?}", self.current.text)) + .new("Size cannot be zero") .with_label(ReportLabel::new(self.current.span.clone())) .into(); }; - Ok(Type::Heap { is_pointer: true, contents: vec![(t, n)] }) + Ok(Type::Size(size)) }, - TokenKind::LBrace => { + TokenKind::Identifier => Ok(Type::Struct(self.current.text.to_string())), + TokenKind::LBrace | TokenKind::LBracket => { + let is_pointer = self.current.kind == TokenKind::LBracket; + let start_kind = if is_pointer {TokenKind::LBracket} else {TokenKind::LBrace}; + let end_kind = if is_pointer {TokenKind::RBracket} else {TokenKind::RBrace}; self.advance(); - if self.current.kind == TokenKind::RBrace { - let mut span = self.current.span.clone(); - span.start_index -= 1; - return ReportKind::SyntaxError - .new("Empty heaps are disallowed") - .with_label(ReportLabel::new(span)) - .with_note("HINT: Did you want to create a void pointer: []") - .into(); - } //NOTE: idk if 5 is the right number. To be determined let mut vec: Vec<(Type, Option)> = Vec::with_capacity(5); - loop { - let start = self.current.span.clone(); - let t = self.parse_type()?; - let mut n = None; - let end = self.current.span.clone(); - let span = start.extend(&end); - self.advance(); - - if self.current.kind == TokenKind::Colon { - self.advance(); - match self.current.kind { - TokenKind::DecimalIntLiteral => { - n = Some(self.current.text.parse::().unwrap()); - if n == Some(0) { - return ReportKind::SyntaxError - .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean {t}:")) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - }, - TokenKind::Comma | TokenKind::RBrace => {}, - _ => { - self.advance(); - return ReportKind::UnexpectedToken - .new(format!( - "Expected either `,` `}}` or a intager, got {:?}", - self.current.kind - )) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }, - } - } + while self.current.kind != end_kind { + // let start = self.current.span.clone(); + let (t, n) = self.parse_array_type(&[TokenKind::Comma, end_kind, TokenKind::NewLine])?; + // let end = self.current.span.clone(); + // let span = start.extend(&end); + // let mut span = self.current.span.clone(); + // span.start_index -= 2; + // span.end_index -= 1; + vec.push((t, n)); - if self.current.kind != TokenKind::Comma { - if self.current.kind == TokenKind::RBrace { - vec.push((t, n)); - break; - } - return ReportKind::SyntaxError - .new("Expected comma to separate heap types") - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - if let Type::Register { ident, .. } = t { + if self.current.kind == TokenKind::NewLine { + let mut span = self.current.span.clone(); + span.start_index -= 1; return ReportKind::SyntaxError - .new("Heap types cannot contain register bindings") + .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) .with_label(ReportLabel::new(span)) - .with_note(format!("HINT: Did you want to bind the pointer to the register? {};r{ident}", Type::Heap { is_pointer: false, contents: vec })) + .with_note(format!("HINT: did you mean to close this heap? {}", Type::Heap { is_pointer, contents: vec } )) .into(); } - vec.push((t, n)); + if self.current.kind != end_kind { + if self.current.kind == (if is_pointer {TokenKind::RBrace} else {TokenKind::RBracket}) { + return ReportKind::SyntaxError + .new("Mismatched heap brackets") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: Be more decisive next time. Is it a pointer or not?") + .into(); + } + + if self.peek(1).kind == TokenKind::NewLine { + return ReportKind::SyntaxError + .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: did you mean to close this heap?") + .into(); + } + + if self.current.kind == TokenKind::Comma { + self.advance(); + } + } } - if self.current.kind != TokenKind::RBrace { - return ReportKind::SyntaxError - .new(format!("Expected closing brace. Got {:?}", self.current.text)) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }; - Ok(Type::Heap { is_pointer: false, contents: vec }) + Ok(Type::Heap { is_pointer, contents: vec }) }, + TokenKind::Colon => { + ReportKind::SyntaxError + .new("Cannot have an array of an unknown type") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: add a type before the colon, duh") + .into() + } + TokenKind::NewLine => { + println!("{:?}", &self.tokens[self.index - 1]); + ReportKind::UnexpectedToken + .new("Unexpected newline") + .with_label(ReportLabel::new(self.current.span.clone())) + .into() + } _ => ReportKind::UnexpectedToken - .new(format!("Unexpected token: {}", self.current.text)) + .new(format!("Unexpected token: {:?}", self.current.kind)) .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("Acceptable tokens are: `[` `{` or an intager") + .with_note("HINT: We expect literally any type... and you still messed it up") .into(), }.and_then(|t| { if self.peek(1).kind == TokenKind::Semicolon { diff --git a/src/report.rs b/src/report.rs index 81d71fe..7832865 100644 --- a/src/report.rs +++ b/src/report.rs @@ -37,6 +37,7 @@ pub enum ReportKind { UnexpectedEOF, InvalidEscapeSequence, DuplicateAttribute, + RegisterWithinHeap, // General IOError, @@ -70,6 +71,7 @@ impl ReportKind { | Self::UnexpectedEOF | Self::DuplicateAttribute | Self::InvalidEscapeSequence + | Self::RegisterWithinHeap // General | Self::IOError | Self::SyntaxError => Level::Error,