From 2356d6b51ea8e9e15c2b36c700ac528ef6af51de Mon Sep 17 00:00:00 2001 From: Andrew Weiss Date: Mon, 15 Apr 2019 12:04:27 -0400 Subject: [PATCH] initial commit --- .gitignore | 3 + .rustfmt.toml | 1 + Cargo.toml | 8 ++ src/ast.rs | 173 +++++++++++++++++++++++ src/bin/repl.rs | 16 +++ src/lexer.rs | 360 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 + src/parser.rs | 336 ++++++++++++++++++++++++++++++++++++++++++++ src/repl.rs | 24 ++++ src/token.rs | 239 ++++++++++++++++++++++++++++++++ 10 files changed, 1165 insertions(+) create mode 100644 .gitignore create mode 100644 .rustfmt.toml create mode 100644 Cargo.toml create mode 100644 src/ast.rs create mode 100644 src/bin/repl.rs create mode 100644 src/lexer.rs create mode 100644 src/lib.rs create mode 100644 src/parser.rs create mode 100644 src/repl.rs create mode 100644 src/token.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..69369904 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 00000000..b196eaa2 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1 @@ +tab_spaces = 2 diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..2fc70371 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "cddl" +version = "0.1.0" +authors = ["Andrew Weiss "] +edition = "2018" + +[dependencies] +whoami = "0.4.1" \ No newline at end of file diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 00000000..3e254ffd --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,173 @@ +use super::token::Token; +use std::fmt; +use std::string::ToString; + +pub trait Node { + fn token_literal(&self) -> Option; +} + +#[derive(Default, Debug)] +pub struct CDDL { + pub rules: Vec, +} + +impl Node for CDDL { + fn token_literal(&self) -> Option { + if self.rules.len() > 0 { + return self.rules[0].token_literal(); + } + + None + } +} + +#[derive(Debug)] +pub struct Identifier(pub Token); + +impl Node for Identifier { + fn token_literal(&self) -> Option { + Some(format!("{:?}", self.0)) + } +} + +impl From for Identifier { + fn from(s: String) -> Self { + Identifier(Token::IDENT(s)) + } +} + +impl ToString for Identifier { + fn to_string(&self) -> String { + format!("{}", self.0.to_string()) + } +} + +#[derive(Debug)] +pub enum Rule { + Type(TypeRule), + Group(GroupRule), +} + +impl Node for Rule { + fn token_literal(&self) -> Option { + match self { + Rule::Type(tr) => tr.token_literal(), + Rule::Group(gr) => gr.token_literal(), + } + } +} + +#[derive(Debug)] +pub struct TypeRule { + pub name: Identifier, + pub generic_param: Option, + pub is_type_choice_alternate: bool, + pub value: Type, +} + +impl TypeRule { + pub fn token_literal(&self) -> Option { + self.name.token_literal() + } +} + +#[derive(Debug)] +pub struct GroupRule { + pub name: Identifier, + pub generic_para: Option, + pub is_group_choice_alternate: bool, + pub entry: GroupEntry, +} + +impl Node for GroupRule { + fn token_literal(&self) -> Option { + Some("".into()) + } +} + +#[derive(Default, Debug)] +pub struct GenericParm(pub Vec); + +#[derive(Debug)] +pub struct GenericArg(pub Vec); + +#[derive(Debug)] +pub struct Type(pub Vec); + +#[derive(Debug)] +pub struct Type1 { + pub type2: Type2, + pub operator: Option<(RangeCtlOp, Type2)>, +} + +#[derive(Debug)] +pub enum RangeCtlOp { + RangeOp(bool), + CtlOp(String), +} + +#[derive(Debug)] +pub enum Type2 { + Value(Identifier), + Typename((Identifier, Option)), + Group(Type), + Map(Group), + Array(Group), + Unwrap((Identifier, Option)), + ChoiceFromInlineGroup(Group), + ChoiceFromGroup((Identifier, Option)), + TaggedData(String), + TaggedDataMajorType(String), + Any, +} + +impl<'a> fmt::Display for Type2 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Type2::Typename((tn, _)) => write!(f, "{}", tn.0), + _ => write!(f, ""), + } + } +} + +#[derive(Debug)] +pub struct Group(Vec); + +#[derive(Debug)] +pub struct GroupChoice(Vec); + +#[derive(Debug)] +pub enum GroupEntry { + MemberKey(MemberKeyEntry), + Groupname(GroupnameEntry), + InlineGroup((Option, Group)), +} + +#[derive(Debug)] +pub struct MemberKeyEntry { + pub occur: Option, + pub member_key: Option, + pub entry_type: Type, +} + +#[derive(Debug)] +pub struct GroupnameEntry { + pub occur: Option, + pub name: Identifier, + pub generic_arg: Option, +} + +#[derive(Debug)] +pub enum MemberKey { + // if true, cut is present + Type1((Type1, bool)), + Bareword(Identifier), + Value(String) +} + +#[derive(Debug)] +pub enum Occur { + Exact((usize, usize)), + OneOrMore, + Optional, +} \ No newline at end of file diff --git a/src/bin/repl.rs b/src/bin/repl.rs new file mode 100644 index 00000000..480d6c4c --- /dev/null +++ b/src/bin/repl.rs @@ -0,0 +1,16 @@ +use cddl::repl; +use std::{error::Error, io}; +use whoami; + +fn main() -> Result<(), Box> { + let username = whoami::username(); + + println!("Hello {}! This is the CDDL language!", username); + + println!("Feel free to type in commands"); + + let input = io::stdin(); + let output = io::stdout(); + repl::start(input.lock(), output.lock())?; + Ok(()) +} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 00000000..2406a631 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,360 @@ +use super::token; +use super::token::Token; +use std::error::Error; +use std::iter::Peekable; +use std::str::Chars; + +// pub struct Lexer<'a> { +// input: &'a str, +// position: usize, +// read_position: usize, +// ch: char, +// } + +pub struct Lexer<'a> { + input: Peekable>, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Lexer<'a> { + Lexer { input: input.chars().peekable() } + } + + fn read_char(&mut self) -> Result> { + self.input.next().ok_or("Unable to advance to the next token".into()) + } + + pub fn next_token(&mut self) -> Result> { + self.skip_whitespace(); + + if let Ok(c) = self.read_char() { + match c { + '=' => match self.peek_char() { + Some(&c) if c == '>' => { + let _ = self.read_char()?; + Ok(Token::ARROWMAP) + } + _ => Ok(Token::ASSIGN), + } + '+' => Ok(Token::PLUS), + '?' => Ok(Token::OPTIONAL), + '*' => Ok(Token::ASTERISK), + '(' => Ok(Token::LPAREN), + ')' => Ok(Token::RPAREN), + '<' => Ok(Token::LANGLEBRACKET), + '"' => Ok(Token::DQUOTE), + '{' => Ok(Token::LBRACE), + '}' => Ok(Token::RBRACE), + ',' => Ok(Token::COMMA), + ';' => Ok(Token::SEMICOLON), + ':' => Ok(Token::COLON), + '^' => Ok(Token::CUT), + '&' => Ok(Token::GTOCHOICE), + '>' => Ok(Token::RANGLEBRACKET), + '$' => match self.peek_char() { + Some(&c) if c == '$' => { + let _ = self.read_char()?; + + Ok(Token::GSOCKET) + } + _ => Ok(Token::TSOCKET), + }, + '/' => match self.peek_char() { + Some(&c) if c == '/' => { + let _ = self.read_char()?; + + match self.peek_char() { + Some(&c) if c == '=' => { + let _ = self.read_char()?; + Ok(Token::GCHOICEALT) + } + _ => Ok(Token::GCHOICE), + } + } + Some(&c) if c == '=' => { + let _ = self.read_char()?; + Ok(Token::TCHOICEALT) + } + _ => Ok(Token::TCHOICE), + }, + '#' => match self.peek_char() { + Some(&c) if c == '6' => { + let _ = self.read_char()?; + Ok(Token::TAG(self.read_tag()?)) + } + None => Ok(Token::ANY), + _ => Ok(Token::ILLEGAL), // Temporary ... need to lex Some(c) + } + '.' => { + let ch = self.read_char()?; + + Ok(token::lookup_control(&*self.read_identifier(ch)?)) + } + _ => { + if is_ealpha(c) { + let ident = token::lookup_ident(&*self.read_identifier(c)?); + + // Range detected + match self.peek_char() { + Some(&c) if c == '.' => { + let _ = self.read_char()?; + + return self.read_range(ident); + } + _ => return Ok(ident), + } + } else if is_digit(c) { + let number = self.read_int_or_float()?; + + // Range detected + match self.read_char() { + Ok(c) if c == '.' => return self.read_range(number), + _ => return Ok(number), + } + } + + Ok(Token::ILLEGAL) + } + } + } else { + Ok(Token::EOF) + } + } + + fn read_identifier(&mut self, c: char) -> Result> { + let mut ident = String::new(); + ident.push(c); + + let mut special_char_count = 0; + + while let Some(&c) = self.peek_char() { + if is_ealpha(c) || is_digit(c) || c == '.' || c == '-' { + // Illegal to have multiple "."'s or "-"'s in an identifier + if c == '.' || c == '-' { + if special_char_count > 1 { + return Err("Invalid identifier".into()); + } + + special_char_count += 1; + } + + ident.push(self.read_char()?); + } else { + break; + } + } + Ok(ident) + } + + fn skip_whitespace(&mut self) { + while let Some(&c) = self.peek_char() { + if c.is_whitespace() { + let _ = self.read_char(); + } else { + break; + } + } + } + + fn read_int_or_float(&mut self) -> Result> { + let ch = self.read_char()?; + + let i = self.read_number(ch)?; + + if let Some(&c) = self.peek_char() { + if c == '.' { + let _ = self.read_char()?; + + if let Some(&c) = self.peek_char() { + if is_digit(c) { + return Ok(Token::FLOATLITERAL( + format!("{}.{}", i, self.read_number(c)?).parse::()?, + )); + } + } + } + } + + Ok(Token::INTLITERAL(i)) + } + + fn read_number(&mut self, c: char) -> Result> { + let mut number = String::new(); + number.push(c); + + while let Some(&c) = self.peek_char() { + if is_digit(c) { + number.push(self.read_char()?); + } else { + break; + } + } + + Ok(number.parse::()?) + } + + fn peek_char(&mut self) -> Option<&char> { + self.input.peek() + } + + fn read_tag(&mut self) -> Result<(usize, String), Box> { + if let Ok(c) = self.read_char() { + if c == '.' { + let ch = self.read_char()?; + + let t = self.read_number(ch)?; + + if let Ok(c) = self.read_char() { + if c == '(' { + let ch = self.read_char()?; + + return Ok((t, self.read_identifier(ch)?)); + } + } + + return Ok((t, String::default())); + } + } + + Ok((0, String::default())) + } + + fn read_range(&mut self, lower: Token) -> Result> { + let mut is_inclusive = true; + let mut t = Token::ILLEGAL; + + if let Ok(c) = self.read_char() { + if c == '.' { + is_inclusive = false; + } + + if is_digit(c) { + t = Token::RANGE(( + lower.to_string(), + self.read_int_or_float()?.to_string(), + is_inclusive, + )); + } else if is_ealpha(c) { + t = Token::RANGE(( + lower.to_string(), + self.read_identifier(c)?, + is_inclusive, + )); + } + } + + Ok(t) + } +} + +fn is_ealpha(ch: char) -> bool { + ch.is_alphabetic() || ch == '@' || ch == '_' || ch == '$' +} + +fn is_digit(ch: char) -> bool { + ch.is_digit(10) +} + +#[cfg(test)] +mod tests { + use super::super::token::Token::*; + use super::*; + + #[test] + fn verify_next_token() { + let input = r#"myfirstrule = myotherrule + +mysecondrule = mythirdrule + +@terminal-color = basecolors / othercolors + +messages = message<"reboot", "now"> + +address = { delivery } + +delivery = ( + street: tstr, ? number ^ => uint, city // + po-box: uint, city // + per-pickup: true +) + +city = ( + name: tstr, + zip-code: uint +)"#; + + let expected_tok = [ + (IDENT("myfirstrule".into()), "myfirstrule"), + (ASSIGN, "="), + (IDENT("myotherrule".into()), "myotherrule"), + (IDENT("mysecondrule".into()), "mysecondrule"), + (ASSIGN, "="), + (IDENT("mythirdrule".into()), "mythirdrule"), + (IDENT("@terminal-color".into()), "@terminal-color"), + (ASSIGN, "="), + (IDENT("basecolors".into()), "basecolors"), + (TCHOICE, "/"), + (IDENT("othercolors".into()), "othercolors"), + (IDENT("messages".into()), "messages"), + (ASSIGN, "="), + (IDENT("message".into()), "message"), + (LANGLEBRACKET, "<"), + (DQUOTE, "\""), + (IDENT("reboot".into()), "reboot"), + (DQUOTE, "\""), + (COMMA, ","), + (DQUOTE, "\""), + (IDENT("now".into()), "now"), + (DQUOTE, "\""), + (RANGLEBRACKET, ">"), + (IDENT("address".into()), "address"), + (ASSIGN, "="), + (LBRACE, "{"), + (IDENT("delivery".into()), "delivery"), + (RBRACE, "}"), + (IDENT("delivery".into()), "delivery"), + (ASSIGN, "="), + (LPAREN, "("), + (IDENT("street".into()), "street"), + (COLON, ":"), + (TSTR, "tstr"), + (COMMA, ","), + (OPTIONAL, "?"), + (NUMBER, "number"), + (CUT, "^"), + (ARROWMAP, "=>"), + (UINT, "uint"), + (COMMA, ","), + (IDENT("city".into()), "city"), + (GCHOICE, "//"), + (IDENT("po-box".into()), "po-box"), + (COLON, ":"), + (UINT, "uint"), + (COMMA, ","), + (IDENT("city".into()), "city"), + (GCHOICE, "//"), + (IDENT("per-pickup".into()), "per-pickup"), + (COLON, ":"), + (TRUE, "true"), + (RPAREN, ")"), + (IDENT("city".into()), "city"), + (ASSIGN, "="), + (LPAREN, "("), + (IDENT("name".into()), "name"), + (COLON, ":"), + (TSTR, "tstr"), + (COMMA, ","), + (IDENT("zip-code".into()), "zip-code"), + (COLON, ":"), + (UINT, "uint"), + (RPAREN, ")"), + ]; + + let mut l = Lexer::new(input); + + for (expected_tok, literal) in expected_tok.iter() { + let tok = l.next_token().unwrap(); + assert_eq!((expected_tok, *literal), (&tok, &*tok.to_string())) + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..010740cc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +mod ast; +mod lexer; +pub mod repl; +mod token; +pub mod parser; \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 00000000..3c3d1fd1 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,336 @@ +use super::ast::{GenericParm, Identifier, Node, Rule, Type, Type1, Type2, TypeRule, CDDL, RangeCtlOp}; +use super::lexer::Lexer; +use super::token::Token; +use std::error::Error; +use std::mem; + +struct Parser<'a> { + l: &'a mut Lexer<'a>, + cur_token: Token, + peek_token: Token, + errors: Vec>, +} + +impl<'a> Parser<'a> { + fn new(l: &'a mut Lexer<'a>) -> Result> { + let mut p = Parser { + l: l, + cur_token: Token::EOF, + peek_token: Token::EOF, + errors: Vec::default(), + }; + + p.next_token()?; + p.next_token()?; + + Ok(p) + } + + fn next_token(&mut self) -> Result<(), Box> { + mem::swap(&mut self.cur_token, &mut self.peek_token); + self.peek_token = self.l.next_token()?; + Ok(()) + } + + fn parse_cddl(&mut self) -> Result> { + let mut c = CDDL::default(); + + while self.cur_token != Token::EOF { + c.rules.push(self.parse_rule()?); + // self.next_token()?; + + } + + Ok(c) + } + + fn parse_rule(&mut self) -> Result> { + let name = match &self.cur_token { + Token::IDENT(i) => Token::IDENT(i.to_string()), + _ => return Err("expected IDENT".into()), + }; + + let mut gp: Option; + + if self.peek_token_is(&Token::LANGLEBRACKET) { + gp = Some(self.parse_genericparm()?); + } else { + gp = None; + } + + if !self.expect_peek(&Token::ASSIGN) + && !self.expect_peek(&Token::TCHOICEALT) + && !self.expect_peek(&Token::GCHOICEALT) + { + return Err("Expected ASSIGN".into()); + } + + let mut is_type_choice_alternate = false; + let mut is_group_choice_alternate = false; + + if self.cur_token_is(Token::TCHOICEALT) { + is_type_choice_alternate = true; + } else if self.cur_token_is(Token::GCHOICEALT) { + is_group_choice_alternate = true; + } + + + + self.next_token()?; + + let mut t: Type; + + // Parse grpent + if self.cur_token_is(Token::LPAREN) { + unimplemented!(); + } else { + t = self.parse_type()?; + } + + let tr = TypeRule { + name: Identifier(name), + generic_param: gp, + is_type_choice_alternate: is_type_choice_alternate, + value: t, + }; + + println!("rule value: {:?}", tr); + + Ok(Rule::Type(tr)) + } + + fn parse_genericparm(&mut self) -> Result> { + self.next_token()?; + + let mut generic_params = GenericParm(Vec::new()); + + while !self.cur_token_is(Token::RANGLEBRACKET) { + match &self.cur_token { + Token::IDENT(i) => { + generic_params.0.push(Identifier::from(i.to_string())); + self.next_token()?; + } + Token::COMMA => self.next_token()?, + _ => return Err("Illegal token".into()), + } + } + + self.next_token()?; + + Ok(generic_params) + } + + fn parse_type(&mut self) -> Result> { + let mut t = Type(Vec::new()); + + t.0.push(self.parse_type1()?); + + while self.cur_token_is(Token::TCHOICE) { + self.next_token()?; + t.0.push(self.parse_type1()?); + } + + Ok(t) + } + + fn parse_type1(&mut self) -> Result> { + match &self.cur_token { + Token::RANGE((l, u, i)) => { + Ok(Type1 { + type2: Type2::Value(l.to_string().into()), + operator: Some((RangeCtlOp::RangeOp(*i), Type2::Value(u.to_string().into()))), + }) + } + Token::IDENT(_) => { + Ok(Type1{ + type2: self.parse_type2()?, + operator: None, + }) + } + _ => Err("Unknown".into()), + } + } + + fn parse_type2(&mut self) -> Result> { + let t2 = match &self.cur_token { + // value + Token::DQUOTE => { + if !self.expect_peek(&Token::IDENT("".into())) { + return Err("Expecting \"IDENT(String)\"".into()); + } + + match &self.cur_token { + Token::IDENT(ident) => Ok(Type2::Value(ident.to_string().into())), + _ => Err("Expecting \"IDENT(String)\"".into()), + } + } + // typename [genericarg] + Token::IDENT(ident) => { + // optional genericarg detected + // if self.peek_token_is(&Token::LANGLEBRACKET) { + + // } + + Ok(Type2::Typename((ident.to_string().into(), None))) + } + _ => return Err("Unknown".into()), + }; + + self.next_token()?; + + t2 + } + + fn cur_token_is(&self, t: Token) -> bool { + mem::discriminant(&self.cur_token) == mem::discriminant(&t) + } + + fn peek_token_is(&self, t: &Token) -> bool { + mem::discriminant(&self.peek_token) == mem::discriminant(&t) + } + + fn expect_peek(&mut self, t: &Token) -> bool { + if self.peek_token_is(t) { + return self.next_token().is_ok(); + } + + self.peek_error(t); + + false + } + + fn peek_error(&mut self, t: &Token) { + self.errors.push( + format!( + "expected next token to be {:?}, got {:?} instead", + t, self.peek_token + ) + .into(), + ) + } +} + +#[cfg(test)] +mod tests { + use super::super::{ast, lexer::Lexer}; + use super::*; + + #[test] + fn verify_rule() -> Result<(), Box> { + let input = r#"myrule = myotherrule + +secondrule = thirdrule"#; + + let mut l = Lexer::new(input); + let mut p = Parser::new(&mut l)?; + + let cddl = p.parse_cddl()?; + check_parser_errors(&p)?; + + if cddl.rules.len() != 2 { + eprintln!( + "cddl.rules does not contain 2 statements. got='{}'", + cddl.rules.len() + ); + } + + let expected_identifiers = ["myrule", "secondrule"]; + + for (idx, expected_identifier) in expected_identifiers.iter().enumerate() { + let rule = &cddl.rules[idx]; + assert!(test_rule(rule, expected_identifier)); + } + + Ok(()) + } + + fn test_rule(r: &Rule, name: &str) -> bool { + match r { + Rule::Type(tr) => { + if tr.name.0.to_string() != name { + eprintln!( + "rule.name.value not '{}'. got={}", + name, + tr.name.0.to_string() + ); + return false; + } + + if tr.name.token_literal().unwrap() != format!("{:?}", Token::IDENT(name.into())) { + eprintln!( + "rule.value not '{}'. got={}", + name, + tr.name.token_literal().unwrap() + ); + return false; + } + + true + } + _ => false, + } + } + + #[test] + fn verify_type() -> Result<(), Box> { + let input = r#"tchoice1 / tchoice2"#; + + let mut l = Lexer::new(input); + let mut p = Parser::new(&mut l)?; + + let t = p.parse_type()?; + check_parser_errors(&p)?; + + if t.0.len() != 2 { + eprintln!( + "type.0 does not contain 2 type choices. got='{}'", + t.0.len() + ); + } + + let expected_t1_identifiers = ["tchoice1", "tchoice2"]; + + for (idx, expected_t1_identifier) in expected_t1_identifiers.iter().enumerate() { + let t_choice = &t.0[idx]; + assert_eq!(t_choice.type2.to_string(), *expected_t1_identifier); + } + + Ok(()) + } + + #[test] + fn verify_genericparm() -> Result<(), Box> { + let input = r#""#; + + let mut l = Lexer::new(input); + let mut p = Parser::new(&mut l)?; + + let gps = p.parse_genericparm()?; + check_parser_errors(&p)?; + + if gps.0.len() != 2 { + eprintln!("GenericParm does not contain 2 generic parameters. got='{}'", gps.0.len()); + } + + let expected_generic_params = ["t", "v"]; + + for (idx, expected_generic_param) in expected_generic_params.iter().enumerate() { + let gp = &gps.0[idx]; + assert_eq!(gp.to_string(), *expected_generic_param); + } + + Ok(()) + } + + fn check_parser_errors(p: &Parser) -> Result<(), Box> { + if p.errors.len() == 0 { + return Ok(()); + } + + for err in p.errors.iter() { + eprintln!("parser error: {}", err.to_string()); + } + + Err("Parser has errors".into()) + } +} diff --git a/src/repl.rs b/src/repl.rs new file mode 100644 index 00000000..5f1b8661 --- /dev/null +++ b/src/repl.rs @@ -0,0 +1,24 @@ +use super::lexer::Lexer; +use super::token::Token; +use std::error; +use std::io::{BufRead, Write}; + +const PROMPT: &[u8] = b">> "; + +pub fn start(mut reader: R, mut writer: W) -> Result<(), Box> { + loop { + writer.write(PROMPT)?; + writer.flush()?; + + let mut line = String::new(); + reader.read_line(&mut line)?; + + let mut l = Lexer::new(&*line); + let mut tok = l.next_token()?; + while tok != Token::EOF { + writer.write(format!("{:?} [literal: \"{}\"]\n", tok, tok.to_string()).as_bytes())?; + writer.flush()?; + tok = l.next_token()?; + } + } +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 00000000..ecf90e7e --- /dev/null +++ b/src/token.rs @@ -0,0 +1,239 @@ +use std::fmt; + +#[derive(PartialEq, Debug)] +pub enum Token { + ILLEGAL, + EOF, + + IDENT(String), + INTLITERAL(usize), + FLOATLITERAL(f64), + TAG((usize, String)), + + // Operators + ASSIGN, + OPTIONAL, + ASTERISK, + OCCURENCE((usize, usize)), + PLUS, + UNWRAP, + CONTROL(String), + + // Delimiters + COMMA, + COLON, + SEMICOLON, + + TCHOICE, + GCHOICE, + TCHOICEALT, + GCHOICEALT, + ARROWMAP, + CUT, + TSOCKET, + GSOCKET, + + RANGE((String, String, bool)), + + LPAREN, + RPAREN, + LBRACE, + RBRACE, + LBRACKET, + RBRACKET, + LANGLEBRACKET, + RANGLEBRACKET, + DQUOTE, + + // Control operators + SIZE, + BITS, + REGEXP, + CBOR, + CBORSEQ, + WITHIN, + AND, + LT, + LE, + GT, + GE, + EQ, + NE, + DEFAULT, + + GTOCHOICE, + + // Standard prelude + FALSE, + TRUE, + BOOL, + NIL, + NULL, + UINT, + NINT, + INT, + FLOAT16, + FLOAT32, + FLOAT64, + FLOAT1632, + FLOAT3264, + FLOAT, + BSTR, + TSTR, + ANY, + BYTES, + TEXT, + TDATE, + TIME, + NUMBER, + BIGUINT, + BIGNINT, + INTEGER, + UNSIGNED, + DECFRAC, + BIGFLOAT, + EB64URL, + EB64LEGACY, + EB16, + ENCODEDCBOR, + URI, + B64URL, + B64LEGACY, + TREGEXP, + MIMEMESSAGE, + CBORANY, + UNDEFINED, +} + +impl<'a> fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Token::IDENT(ident) => write!(f, "{}", ident), + Token::ILLEGAL => write!(f, ""), + Token::ASSIGN => write!(f, "="), + Token::PLUS => write!(f, "+"), + Token::OPTIONAL => write!(f, "?"), + Token::ASTERISK => write!(f, "*"), + Token::LPAREN => write!(f, "("), + Token::RPAREN => write!(f, ")"), + Token::LBRACE => write!(f, "{{"), + Token::RBRACE => write!(f, "}}"), + Token::TCHOICE => write!(f, "/"), + Token::TCHOICEALT => write!(f, "/="), + Token::GCHOICEALT => write!(f, "//="), + Token::COMMA => write!(f, ","), + Token::SEMICOLON => write!(f, ";"), + Token::COLON => write!(f, ":"), + Token::CUT => write!(f, "^"), + Token::EOF => write!(f, ""), + Token::TSTR => write!(f, "tstr"), + Token::LANGLEBRACKET => write!(f, "<"), + Token::RANGLEBRACKET => write!(f, ">"), + Token::DQUOTE => write!(f, "\""), + Token::INT => write!(f, "int"), + Token::UINT => write!(f, "uint"), + Token::INTLITERAL(i) => write!(f, "{}", i), + Token::FLOATLITERAL(fl) => write!(f, "{}", fl), + Token::ARROWMAP => write!(f, "=>"), + Token::SIZE => write!(f, ".size"), + Token::BITS => write!(f, ".bits"), + Token::REGEXP => write!(f, ".regexp"), + Token::CBOR => write!(f, ".cbor"), + Token::CBORSEQ => write!(f, ".cborseq"), + Token::WITHIN => write!(f, ".within"), + Token::AND => write!(f, ".and"), + Token::LT => write!(f, ".lt"), + Token::LE => write!(f, ".le"), + Token::GT => write!(f, ".gt"), + Token::GE => write!(f, ".ge"), + Token::EQ => write!(f, ".eq"), + Token::NE => write!(f, ".ne"), + Token::DEFAULT => write!(f, ".default"), + Token::NUMBER => write!(f, "number"), + Token::BSTR => write!(f, "bstr"), + Token::GCHOICE => write!(f, "//"), + Token::TRUE => write!(f, "true"), + Token::GTOCHOICE => write!(f, "&"), + Token::RANGE((l, u, i)) => { + if *i { + return write!(f, "{}..{}", l, u); + } + + write!(f, "{}...{}", l, u) + } + Token::TAG((tn, tt)) => { + if tt != "" { + return write!(f, "#6.{}({})", tn, tt); + } + + write!(f, "#6.{}", tn) + } + _ => write!(f, ""), + } + } +} + +pub fn lookup_control(ident: &str) -> Token { + match ident { + "size" => Token::SIZE, + "bits" => Token::BITS, + "regexp" => Token::REGEXP, + "cbor" => Token::CBOR, + "cborseq" => Token::CBORSEQ, + "within" => Token::WITHIN, + "and" => Token::AND, + "lt" => Token::LT, + "le" => Token::LE, + "gt" => Token::GT, + "ge" => Token::GE, + "eq" => Token::EQ, + "ne" => Token::NE, + "default" => Token::DEFAULT, + _ => Token::ILLEGAL, + } +} + +pub fn lookup_ident(ident: &str) -> Token { + match ident { + "false" => Token::FALSE, + "true" => Token::TRUE, + "bool" => Token::BOOL, + "nil" => Token::NIL, + "null" => Token::NULL, + "uint" => Token::UINT, + "nint" => Token::NINT, + "int" => Token::INT, + "float16" => Token::FLOAT16, + "float32" => Token::FLOAT32, + "float64" => Token::FLOAT64, + "float16-32" => Token::FLOAT1632, + "float32-64" => Token::FLOAT3264, + "float" => Token::FLOAT, + "bstr" => Token::BSTR, + "tstr" => Token::TSTR, + "any" => Token::ANY, + "bytes" => Token::BYTES, + "text" => Token::TEXT, + "tdate" => Token::TDATE, + "time" => Token::TIME, + "number" => Token::NUMBER, + "biguint" => Token::BIGUINT, + "bignint" => Token::BIGNINT, + "integer" => Token::INTEGER, + "unsigned" => Token::UNSIGNED, + "decfrac" => Token::DECFRAC, + "bigfloat" => Token::BIGFLOAT, + "eb64url" => Token::EB64URL, + "eb64legacy" => Token::EB64LEGACY, + "eb16" => Token::EB16, + "encoded-cbor" => Token::ENCODEDCBOR, + "uri" => Token::URI, + "b64url" => Token::B64URL, + "b64legacy" => Token::B64LEGACY, + "regexp" => Token::TREGEXP, + "mime-message" => Token::MIMEMESSAGE, + "cbor-any" => Token::CBORANY, + "undefined" => Token::UNDEFINED, + _ => Token::IDENT(ident.into()), + } +}