Skip to content

Commit

Permalink
refactor(#26): parser types now pass all test cases
Browse files Browse the repository at this point in the history
Signed-off-by: fami-fish <[email protected]>
  • Loading branch information
fami-fish authored and slbsh committed Aug 5, 2024
1 parent d126ecf commit bfd47cc
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 148 deletions.
23 changes: 7 additions & 16 deletions src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,30 +50,21 @@ impl Display for Type {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Size(s) => write!(f, "{s}")?,
Self::Heap { is_pointer: true, contents } => {
// contents length is always 1 for pointers
write!(f, "[{}", &contents[0].0)?;
match contents[0].1 {
Some(0) => write!(f, ":")?,
Some(size) => write!(f, ":{size}")?,
None => {},
};

write!(f, "]")?;
},
Self::Heap { is_pointer: false, contents } => {
write!(f, "{{")?;
for (t, elems) in contents {
Self::Heap { is_pointer, contents } => {
write!(f, "{}", if *is_pointer { "[" } else { "{" })?;
for (i, (t, elems)) in contents.iter().enumerate() {
write!(f, "{t}")?;
match elems {
Some(0) => write!(f, ":")?,
Some(size) => write!(f, ":{size}")?,
None => {},
};

write!(f, ", ")?;
if i != contents.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "}}")?;
write!(f, "{}", if *is_pointer { "]" } else { "}" })?;
},
Self::Register { inner: t, ident } => {
if t.is_some() {
Expand Down
252 changes: 120 additions & 132 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ impl<'t, 'contents> Parser<'t, 'contents> {
span: self.current.span.clone(),
});
self.advance();
// assert_eq!(self.current.kind, TokenKind::NewLine);
return ret;
},
_ => self.parse_expression(),
Expand Down Expand Up @@ -371,7 +372,7 @@ impl<'t, 'contents> Parser<'t, 'contents> {
}

fn parse_register_binding(&mut self, inner: Option<Type>) -> Result<Type> {
// register binding
// we expect that self is a ;
self.advance();

match self.current.kind {
Expand All @@ -394,9 +395,9 @@ impl<'t, 'contents> Parser<'t, 'contents> {
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note("HINT: Registers follow the format r<ident>. e.g r8 r32")
.into();
}
};

match self.current.text.strip_prefix('r').unwrap().parse::<usize>() {
match self.current.text[1..].parse::<usize>() {
Err(e) => match e.kind() {
IntErrorKind::Empty => ReportKind::SyntaxError
.new("Expected register identifier after r prefix")
Expand All @@ -412,6 +413,8 @@ impl<'t, 'contents> Parser<'t, 'contents> {
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note("HINT: Registers follow the format r<ident>. e.g r8 r32")
},
// Here only positive overflow can be omitted by parse::<usize>()
// It also doesnt omit Zero because usize can store 0.
_ => ReportKind::SyntaxError
.new("Register identifier intager overflows")
.with_label(ReportLabel::new(self.current.span.clone()))
Expand All @@ -422,168 +425,153 @@ impl<'t, 'contents> Parser<'t, 'contents> {
}
}

fn parse_type(&mut self) -> Result<Type> {
match self.current.kind {
TokenKind::Semicolon => self.parse_register_binding(None),
TokenKind::DecimalIntLiteral => {
// We know it lexed so this has to pass, so we can unwrap
let size = self.current.text.parse::<usize>().unwrap();
if size == 0 {
return ReportKind::SyntaxError
.new("Size cannot be zero")
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
};
// We use box here cause we never grow the terminals, so no need for length/capacity which
// comes with Vec<TokenKind>
fn parse_array_type(&mut self, terminals: &[TokenKind]) -> Result<(Type, Option<usize>)> {
let elem_type = self.parse_type()?;

Ok(Type::Size(size))
},
TokenKind::Identifier => Ok(Type::Struct(self.current.text.to_string())),
TokenKind::LBracket => {
self.advance();
let t = self.parse_type()?;
let mut n = None;
if let Type::Register { ident, .. } = elem_type {
return ReportKind::RegisterWithinHeap
.new("Heaps cannot contain register bindings")
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note(format!("{ident}"))
.into();
}

self.advance();
if self.current.kind == TokenKind::Colon {
self.advance();
if self.current.kind == TokenKind::Colon {
for term in terminals {
if *term == self.peek(1).kind {
self.advance();
match self.current.kind {
TokenKind::DecimalIntLiteral => {
n = Some(self.current.text.parse::<usize>().unwrap());
if n == Some(0) {
return ReportKind::SyntaxError
.new("Array size cannot be zero.")
.with_note(format!("HINT: Did you mean [{t}:]"))
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
}
self.advance();
},
TokenKind::RBracket => {},
_ => {
self.advance();
return ReportKind::UnexpectedToken
.new(format!("got {:?}", self.current.kind))
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
},
}
return Ok((elem_type, Some(0)));
}
// We should fail earlier but we wait to gather the element size
// n before logging for clearer error logging
if let Type::Register { inner, ident } = t {
let mut inner_str = String::new();
let mut n_str = String::new();
if inner.is_some() {
inner_str = format!("{}", inner.unwrap());
}
}

if n.is_some() && n.unwrap() != 0 {
n_str = format!("{}", n.unwrap());
}
self.advance();

self.advance();
if self.current.kind == TokenKind::DecimalIntLiteral {
let elem_size = self.current.text.parse::<usize>().unwrap();
if elem_size == 0 {
return ReportKind::SyntaxError
.new("Heap types cannot contain register bindings")
.new("Array size cannot be zero.")
.with_note(format!("HINT: Did you mean [{elem_type}:]"))
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note(format!("HINT: Did you want to bind the pointer to the register? [{inner_str}:{n_str}];r{ident}"))
.into();
}
self.advance();
return Ok((elem_type, Some(elem_size)));
}
return ReportKind::UnexpectedToken
.new(format!(
"Expected {}, got {:?}",
terminals
.into_iter()
.map(|x| format!("{x:?}"))
.collect::<Vec<String>>()
.join(" or "),
self.current.kind
))
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
}

if self.current.kind != TokenKind::RBracket {
self.advance();
Ok((elem_type, None))
}

fn parse_type(&mut self) -> Result<Type> {
match self.current.kind {
TokenKind::Semicolon => self.parse_register_binding(None),
TokenKind::DecimalIntLiteral => {
// We know it lexed so this has to pass, so we can unwrap
let Ok(size) = self.current.text.parse::<usize>() else {
return ReportKind::SyntaxError
.new("You cant have this many bytes, what are you even doing anyways?? stack overflow?")
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
};

if size == 0 {
return ReportKind::SyntaxError
.new(format!("Expected closing bracket. Got {:?}", self.current.text))
.new("Size cannot be zero")
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
};

Ok(Type::Heap { is_pointer: true, contents: vec![(t, n)] })
Ok(Type::Size(size))
},
TokenKind::LBrace => {
TokenKind::Identifier => Ok(Type::Struct(self.current.text.to_string())),
TokenKind::LBrace | TokenKind::LBracket => {
let is_pointer = self.current.kind == TokenKind::LBracket;
let start_kind = if is_pointer {TokenKind::LBracket} else {TokenKind::LBrace};
let end_kind = if is_pointer {TokenKind::RBracket} else {TokenKind::RBrace};
self.advance();
if self.current.kind == TokenKind::RBrace {
let mut span = self.current.span.clone();
span.start_index -= 1;

return ReportKind::SyntaxError
.new("Empty heaps are disallowed")
.with_label(ReportLabel::new(span))
.with_note("HINT: Did you want to create a void pointer: []")
.into();
}
//NOTE: idk if 5 is the right number. To be determined
let mut vec: Vec<(Type, Option<usize>)> = Vec::with_capacity(5);
loop {
let start = self.current.span.clone();
let t = self.parse_type()?;
let mut n = None;

let end = self.current.span.clone();
let span = start.extend(&end);
self.advance();

if self.current.kind == TokenKind::Colon {
self.advance();
match self.current.kind {
TokenKind::DecimalIntLiteral => {
n = Some(self.current.text.parse::<usize>().unwrap());
if n == Some(0) {
return ReportKind::SyntaxError
.new("Array size cannot be zero.")
.with_note(format!("HINT: Did you mean {t}:"))
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
}
self.advance();
},
TokenKind::Comma | TokenKind::RBrace => {},
_ => {
self.advance();
return ReportKind::UnexpectedToken
.new(format!(
"Expected either `,` `}}` or a intager, got {:?}",
self.current.kind
))
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
},
}
}
while self.current.kind != end_kind {
// let start = self.current.span.clone();
let (t, n) = self.parse_array_type(&[TokenKind::Comma, end_kind, TokenKind::NewLine])?;
// let end = self.current.span.clone();
// let span = start.extend(&end);
// let mut span = self.current.span.clone();
// span.start_index -= 2;
// span.end_index -= 1;
vec.push((t, n));

if self.current.kind != TokenKind::Comma {
if self.current.kind == TokenKind::RBrace {
vec.push((t, n));
break;
}
return ReportKind::SyntaxError
.new("Expected comma to separate heap types")
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
}
self.advance();
if let Type::Register { ident, .. } = t {
if self.current.kind == TokenKind::NewLine {
let mut span = self.current.span.clone();
span.start_index -= 1;
return ReportKind::SyntaxError
.new("Heap types cannot contain register bindings")
.new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""}))
.with_label(ReportLabel::new(span))
.with_note(format!("HINT: Did you want to bind the pointer to the register? {};r{ident}", Type::Heap { is_pointer: false, contents: vec }))
.with_note(format!("HINT: did you mean to close this heap? {}", Type::Heap { is_pointer, contents: vec } ))
.into();
}

vec.push((t, n));
if self.current.kind != end_kind {
if self.current.kind == (if is_pointer {TokenKind::RBrace} else {TokenKind::RBracket}) {
return ReportKind::SyntaxError
.new("Mismatched heap brackets")
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note("HINT: Be more decisive next time. Is it a pointer or not?")
.into();
}

if self.peek(1).kind == TokenKind::NewLine {
return ReportKind::SyntaxError
.new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""}))
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note("HINT: did you mean to close this heap?")
.into();
}

if self.current.kind == TokenKind::Comma {
self.advance();
}
}
}

if self.current.kind != TokenKind::RBrace {
return ReportKind::SyntaxError
.new(format!("Expected closing brace. Got {:?}", self.current.text))
.with_label(ReportLabel::new(self.current.span.clone()))
.into();
};
Ok(Type::Heap { is_pointer: false, contents: vec })
Ok(Type::Heap { is_pointer, contents: vec })
},
TokenKind::Colon => {
ReportKind::SyntaxError
.new("Cannot have an array of an unknown type")
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note("HINT: add a type before the colon, duh")
.into()
}
TokenKind::NewLine => {
println!("{:?}", &self.tokens[self.index - 1]);
ReportKind::UnexpectedToken
.new("Unexpected newline")
.with_label(ReportLabel::new(self.current.span.clone()))
.into()
}
_ => ReportKind::UnexpectedToken
.new(format!("Unexpected token: {}", self.current.text))
.new(format!("Unexpected token: {:?}", self.current.kind))
.with_label(ReportLabel::new(self.current.span.clone()))
.with_note("Acceptable tokens are: `[` `{` or an intager")
.with_note("HINT: We expect literally any type... and you still messed it up")
.into(),
}.and_then(|t| {
if self.peek(1).kind == TokenKind::Semicolon {
Expand Down
2 changes: 2 additions & 0 deletions src/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub enum ReportKind {
UnexpectedEOF,
InvalidEscapeSequence,
DuplicateAttribute,
RegisterWithinHeap,

// General
IOError,
Expand Down Expand Up @@ -70,6 +71,7 @@ impl ReportKind {
| Self::UnexpectedEOF
| Self::DuplicateAttribute
| Self::InvalidEscapeSequence
| Self::RegisterWithinHeap

// General
| Self::IOError | Self::SyntaxError => Level::Error,
Expand Down

0 comments on commit bfd47cc

Please sign in to comment.