From 2356d6b51ea8e9e15c2b36c700ac528ef6af51de Mon Sep 17 00:00:00 2001
From: Andrew Weiss <anweiss@docker.com>
Date: Mon, 15 Apr 2019 12:04:27 -0400
Subject: [PATCH] initial commit

---
 .gitignore      |   3 +
 .rustfmt.toml   |   1 +
 Cargo.toml      |   8 ++
 src/ast.rs      | 173 +++++++++++++++++++++++
 src/bin/repl.rs |  16 +++
 src/lexer.rs    | 360 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs      |   5 +
 src/parser.rs   | 336 ++++++++++++++++++++++++++++++++++++++++++++
 src/repl.rs     |  24 ++++
 src/token.rs    | 239 ++++++++++++++++++++++++++++++++
 10 files changed, 1165 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .rustfmt.toml
 create mode 100644 Cargo.toml
 create mode 100644 src/ast.rs
 create mode 100644 src/bin/repl.rs
 create mode 100644 src/lexer.rs
 create mode 100644 src/lib.rs
 create mode 100644 src/parser.rs
 create mode 100644 src/repl.rs
 create mode 100644 src/token.rs
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..69369904
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+**/*.rs.bk
+Cargo.lock
diff --git a/.rustfmt.toml b/.rustfmt.toml
new file mode 100644
index 00000000..b196eaa2
--- /dev/null
+++ b/.rustfmt.toml
@@ -0,0 +1 @@
+tab_spaces = 2
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 00000000..2fc70371
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "cddl"
+version = "0.1.0"
+authors = ["Andrew Weiss <andrew.weiss@outlook.com>"]
+edition = "2018"
+
+[dependencies]
+whoami = "0.4.1"
\ No newline at end of file
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 00000000..3e254ffd
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,173 @@
+use super::token::Token;
+use std::fmt;
+use std::string::ToString;
+
+pub trait Node {
+  fn token_literal(&self) -> Option<String>;
+}
+
+#[derive(Default, Debug)]
+pub struct CDDL {
+  pub rules: Vec<Rule>,
+}
+
+impl Node for CDDL {
+  fn token_literal(&self) -> Option<String> {
+    if self.rules.len() > 0 {
+      return self.rules[0].token_literal();
+    }
+
+    None
+  }
+}
+
+#[derive(Debug)]
+pub struct Identifier(pub Token);
+
+impl Node for Identifier {
+  fn token_literal(&self) -> Option<String> {
+    Some(format!("{:?}", self.0))
+  }
+}
+
+impl From<String> for Identifier {
+  fn from(s: String) -> Self {
+    Identifier(Token::IDENT(s))
+  }
+}
+
+impl ToString for Identifier {
+  fn to_string(&self) -> String {
+    format!("{}", self.0.to_string())
+  }
+}
+
+#[derive(Debug)]
+pub enum Rule {
+  Type(TypeRule),
+  Group(GroupRule),
+}
+
+impl Node for Rule {
+  fn token_literal(&self) -> Option<String> {
+    match self {
+      Rule::Type(tr) => tr.token_literal(),
+      Rule::Group(gr) => gr.token_literal(),
+    }
+  }
+}
+
+#[derive(Debug)]
+pub struct TypeRule {
+  pub name: Identifier,
+  pub generic_param: Option<GenericParm>,
+  pub is_type_choice_alternate: bool,
+  pub value: Type,
+}
+
+impl TypeRule {
+  pub fn token_literal(&self) -> Option<String> {
+    self.name.token_literal()
+  }
+}
+
+#[derive(Debug)]
+pub struct GroupRule {
+  pub name: Identifier,
+  pub generic_para: Option<GenericParm>,
+  pub is_group_choice_alternate: bool,
+  pub entry: GroupEntry,
+}
+
+impl Node for GroupRule {
+  fn token_literal(&self) -> Option<String> {
+    Some("".into())
+  }
+}
+
+#[derive(Default, Debug)]
+pub struct GenericParm(pub Vec<Identifier>);
+
+#[derive(Debug)]
+pub struct GenericArg(pub Vec<Type1>);
+
+#[derive(Debug)]
+pub struct Type(pub Vec<Type1>);
+
+#[derive(Debug)]
+pub struct Type1 {
+  pub type2: Type2,
+  pub operator: Option<(RangeCtlOp, Type2)>,
+}
+
+#[derive(Debug)]
+pub enum RangeCtlOp {
+  RangeOp(bool),
+  CtlOp(String),
+}
+
+#[derive(Debug)]
+pub enum Type2 {
+  Value(Identifier),
+  Typename((Identifier, Option<GenericArg>)),
+  Group(Type),
+  Map(Group),
+  Array(Group),
+  Unwrap((Identifier, Option<GenericArg>)),
+  ChoiceFromInlineGroup(Group),
+  ChoiceFromGroup((Identifier, Option<GenericArg>)),
+  TaggedData(String),
+  TaggedDataMajorType(String),
+  Any,
+}
+
+impl<'a> fmt::Display for Type2 {
+  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    match self {
+      Type2::Typename((tn, _)) => write!(f, "{}", tn.0),
+      _ => write!(f, ""),
+    }
+  }
+}
+
+#[derive(Debug)]
+pub struct Group(Vec<GroupChoice>);
+
+#[derive(Debug)]
+pub struct GroupChoice(Vec<GroupEntry>);
+
+#[derive(Debug)]
+pub enum GroupEntry {
+  MemberKey(MemberKeyEntry),
+  Groupname(GroupnameEntry),
+  InlineGroup((Option<Occur>, Group)),
+}
+
+#[derive(Debug)]
+pub struct MemberKeyEntry {
+  pub occur: Option<Occur>,
+  pub member_key: Option<MemberKey>,
+  pub entry_type: Type, 
+}
+
+#[derive(Debug)]
+pub struct GroupnameEntry {
+  pub occur: Option<Occur>,
+  pub name: Identifier,
+  pub generic_arg: Option<GenericArg>,
+}
+
+#[derive(Debug)]
+pub enum MemberKey {
+  // if true, cut is present
+  Type1((Type1, bool)),
+  Bareword(Identifier),
+  Value(String)
+}
+
+#[derive(Debug)]
+pub enum Occur {
+  Exact((usize, usize)),
+  OneOrMore,
+  Optional,
+}
\ No newline at end of file
diff --git a/src/bin/repl.rs b/src/bin/repl.rs
new file mode 100644
index 00000000..480d6c4c
--- /dev/null
+++ b/src/bin/repl.rs
@@ -0,0 +1,16 @@
+use cddl::repl;
+use std::{error::Error, io};
+use whoami;
+
+fn main() -> Result<(), Box<Error>> {
+  let username = whoami::username();
+
+  println!("Hello {}! This is the CDDL language!", username);
+
+  println!("Feel free to type in commands");
+
+  let input = io::stdin();
+  let output = io::stdout();
+  repl::start(input.lock(), output.lock())?;
+  Ok(())
+}
diff --git a/src/lexer.rs b/src/lexer.rs
new file mode 100644
index 00000000..2406a631
--- /dev/null
+++ b/src/lexer.rs
@@ -0,0 +1,360 @@
+use super::token;
+use super::token::Token;
+use std::error::Error;
+use std::iter::Peekable;
+use std::str::Chars;
+
+// pub struct Lexer<'a> {
+//   input: &'a str,
+//   position: usize,
+//   read_position: usize,
+//   ch: char,
+// }
+
+pub struct Lexer<'a> {
+  input: Peekable<Chars<'a>>,
+}
+
+impl<'a> Lexer<'a> {
+  pub fn new(input: &'a str) -> Lexer<'a> {
+    Lexer { input: input.chars().peekable() }
+  }
+
+  fn read_char(&mut self) -> Result<char, Box<Error>> {
+    self.input.next().ok_or("Unable to advance to the next token".into())
+  }
+
+  pub fn next_token(&mut self) -> Result<Token, Box<Error>> {
+    self.skip_whitespace();
+
+    if let Ok(c) = self.read_char() {
+      match c {
+        '=' => match self.peek_char() {
+          Some(&c) if c == '>' => {
+            let _ = self.read_char()?;
+             Ok(Token::ARROWMAP)
+          }
+          _ => Ok(Token::ASSIGN),
+        }
+        '+' => Ok(Token::PLUS),
+        '?' => Ok(Token::OPTIONAL),
+        '*' => Ok(Token::ASTERISK),
+        '(' => Ok(Token::LPAREN),
+        ')' => Ok(Token::RPAREN),
+        '<' => Ok(Token::LANGLEBRACKET),
+        '"' => Ok(Token::DQUOTE),
+        '{' => Ok(Token::LBRACE),
+        '}' => Ok(Token::RBRACE),
+        ',' => Ok(Token::COMMA),
+        ';' => Ok(Token::SEMICOLON),
+        ':' => Ok(Token::COLON),
+        '^' => Ok(Token::CUT),
+        '&' => Ok(Token::GTOCHOICE),
+        '>' => Ok(Token::RANGLEBRACKET),
+        '$' => match self.peek_char() {
+          Some(&c) if c == '$' => {
+            let _ = self.read_char()?;
+
+            Ok(Token::GSOCKET)
+          }
+          _ => Ok(Token::TSOCKET),
+        },
+        '/' => match self.peek_char() {
+          Some(&c) if c == '/' => {
+            let _ = self.read_char()?;
+
+            match self.peek_char() {
+              Some(&c) if c == '=' => {
+                let _ = self.read_char()?;
+                Ok(Token::GCHOICEALT)
+              }
+              _ => Ok(Token::GCHOICE),
+            }
+          }
+          Some(&c) if c == '=' => {
+            let _ = self.read_char()?;
+            Ok(Token::TCHOICEALT)
+          }
+          _ => Ok(Token::TCHOICE),
+        },
+        '#' => match self.peek_char() {
+          Some(&c) if c == '6' => {
+            let _ = self.read_char()?;
+            Ok(Token::TAG(self.read_tag()?))
+          }
+          None => Ok(Token::ANY),
+          _ => Ok(Token::ILLEGAL), // Temporary ... need to lex Some(c)
+        }
+        '.' => {
+          let ch = self.read_char()?;
+
+          Ok(token::lookup_control(&*self.read_identifier(ch)?))
+        }
+        _ => {
+          if is_ealpha(c) {
+            let ident = token::lookup_ident(&*self.read_identifier(c)?);
+
+            // Range detected
+            match self.peek_char() {
+              Some(&c) if c == '.' => {
+                let _ = self.read_char()?;
+
+                return self.read_range(ident);
+              }
+              _ => return Ok(ident),
+            }
+          } else if is_digit(c) {
+            let number = self.read_int_or_float()?;
+
+            // Range detected
+            match self.read_char() {
+              Ok(c) if c == '.' => return self.read_range(number),
+              _ => return Ok(number),
+            }
+          }
+
+          Ok(Token::ILLEGAL)
+        }
+      }
+    } else {
+      Ok(Token::EOF)
+    }
+  }
+
+  fn read_identifier(&mut self, c: char) -> Result<String, Box<Error>> {
+    let mut ident = String::new();
+    ident.push(c);
+
+    let mut special_char_count = 0;
+
+    while let Some(&c) = self.peek_char() {
+      if is_ealpha(c) || is_digit(c) || c == '.' || c == '-' {
+        // Illegal to have multiple "."'s or "-"'s in an identifier
+        if c == '.' || c == '-' {
+          if special_char_count > 1 {
+            return Err("Invalid identifier".into());
+          }
+
+          special_char_count += 1;
+        }
+
+        ident.push(self.read_char()?);
+      } else {
+        break;
+      }
+    }
+    Ok(ident)
+  }
+
+  fn skip_whitespace(&mut self) {
+    while let Some(&c) = self.peek_char() {
+      if c.is_whitespace() {
+        let _ = self.read_char();
+      } else {
+        break;
+      }
+    }
+  }
+
+  fn read_int_or_float(&mut self) -> Result<Token, Box<Error>> {
+    let ch = self.read_char()?;
+
+    let i = self.read_number(ch)?;
+
+    if let Some(&c) = self.peek_char() {
+      if c == '.' {
+        let _ = self.read_char()?;
+
+        if let Some(&c) = self.peek_char() {
+          if is_digit(c) {
+            return Ok(Token::FLOATLITERAL(
+              format!("{}.{}", i, self.read_number(c)?).parse::<f64>()?,
+            ));
+          }
+        }
+      }
+    }
+
+    Ok(Token::INTLITERAL(i))
+  }
+
+  fn read_number(&mut self, c: char) -> Result<usize, Box<Error>> {
+    let mut number = String::new();
+    number.push(c);
+
+    while let Some(&c) = self.peek_char() {
+      if is_digit(c) {
+        number.push(self.read_char()?);
+      } else {
+        break;
+      }
+    }
+
+    Ok(number.parse::<usize>()?)
+  }
+
+  fn peek_char(&mut self) -> Option<&char> {
+    self.input.peek()
+  }
+
+  fn read_tag(&mut self) -> Result<(usize, String), Box<Error>> {
+    if let Ok(c) = self.read_char() {
+      if c == '.' {
+        let ch = self.read_char()?;
+
+        let t = self.read_number(ch)?;
+
+        if let Ok(c) = self.read_char() {
+          if c == '(' {
+            let ch = self.read_char()?;
+
+            return Ok((t, self.read_identifier(ch)?));
+          }
+        }
+
+        return Ok((t, String::default()));
+      }
+    }
+
+    Ok((0, String::default()))
+  }
+
+  fn read_range(&mut self, lower: Token) -> Result<Token, Box<Error>> {
+    let mut is_inclusive = true;
+    let mut t = Token::ILLEGAL;
+
+    if let Ok(c) = self.read_char() {
+      if c == '.' {
+        is_inclusive = false;
+      }
+
+      if is_digit(c) {
+        t = Token::RANGE((
+          lower.to_string(),
+          self.read_int_or_float()?.to_string(),
+          is_inclusive,
+        ));
+      } else if is_ealpha(c) {
+          t = Token::RANGE((
+          lower.to_string(),
+          self.read_identifier(c)?,
+          is_inclusive,
+        ));
+      }
+    }
+
+    Ok(t)
+  }
+}
+
+fn is_ealpha(ch: char) -> bool {
+  ch.is_alphabetic() || ch == '@' || ch == '_' || ch == '$'
+}
+
+fn is_digit(ch: char) -> bool {
+  ch.is_digit(10)
+}
+
+#[cfg(test)]
+mod tests {
+  use super::super::token::Token::*;
+  use super::*;
+
+  #[test]
+  fn verify_next_token() {
+    let input = r#"myfirstrule = myotherrule
+
+mysecondrule = mythirdrule
+
+@terminal-color = basecolors / othercolors
+    
+messages = message<"reboot", "now">
+
+address = { delivery }
+
+delivery = (
+  street: tstr, ? number ^ => uint, city //
+  po-box: uint, city //
+  per-pickup: true
+)
+
+city = (
+  name: tstr,
+  zip-code: uint
+)"#;
+
+    let expected_tok = [
+      (IDENT("myfirstrule".into()), "myfirstrule"),
+      (ASSIGN, "="),
+      (IDENT("myotherrule".into()), "myotherrule"),
+      (IDENT("mysecondrule".into()), "mysecondrule"),
+      (ASSIGN, "="),
+      (IDENT("mythirdrule".into()), "mythirdrule"),
+      (IDENT("@terminal-color".into()), "@terminal-color"),
+      (ASSIGN, "="),
+      (IDENT("basecolors".into()), "basecolors"),
+      (TCHOICE, "/"),
+      (IDENT("othercolors".into()), "othercolors"),
+      (IDENT("messages".into()), "messages"),
+      (ASSIGN, "="),
+      (IDENT("message".into()), "message"),
+      (LANGLEBRACKET, "<"),
+      (DQUOTE, "\""),
+      (IDENT("reboot".into()), "reboot"),
+      (DQUOTE, "\""),
+      (COMMA, ","),
+      (DQUOTE, "\""),
+      (IDENT("now".into()), "now"),
+      (DQUOTE, "\""),
+      (RANGLEBRACKET, ">"),
+      (IDENT("address".into()), "address"),
+      (ASSIGN, "="),
+      (LBRACE, "{"),
+      (IDENT("delivery".into()), "delivery"),
+      (RBRACE, "}"),
+      (IDENT("delivery".into()), "delivery"),
+      (ASSIGN, "="),
+      (LPAREN, "("),
+      (IDENT("street".into()), "street"),
+      (COLON, ":"),
+      (TSTR, "tstr"),
+      (COMMA, ","),
+      (OPTIONAL, "?"),
+      (NUMBER, "number"),
+      (CUT, "^"),
+      (ARROWMAP, "=>"),
+      (UINT, "uint"),
+      (COMMA, ","),
+      (IDENT("city".into()), "city"),
+      (GCHOICE, "//"),
+      (IDENT("po-box".into()), "po-box"),
+      (COLON, ":"),
+      (UINT, "uint"),
+      (COMMA, ","),
+      (IDENT("city".into()), "city"),
+      (GCHOICE, "//"),
+      (IDENT("per-pickup".into()), "per-pickup"),
+      (COLON, ":"),
+      (TRUE, "true"),
+      (RPAREN, ")"),
+      (IDENT("city".into()), "city"),
+      (ASSIGN, "="),
+      (LPAREN, "("),
+      (IDENT("name".into()), "name"),
+      (COLON, ":"),
+      (TSTR, "tstr"),
+      (COMMA, ","),
+      (IDENT("zip-code".into()), "zip-code"),
+      (COLON, ":"),
+      (UINT, "uint"),
+      (RPAREN, ")"),
+    ];
+
+    let mut l = Lexer::new(input);
+
+    for (expected_tok, literal) in expected_tok.iter() {
+      let tok = l.next_token().unwrap();
+      assert_eq!((expected_tok, *literal), (&tok, &*tok.to_string()))
+    }
+  }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 00000000..010740cc
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,5 @@
+mod ast;
+mod lexer;
+pub mod repl;
+mod token;
+pub mod parser;
\ No newline at end of file
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 00000000..3c3d1fd1
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,336 @@
+use super::ast::{GenericParm, Identifier, Node, Rule, Type, Type1, Type2, TypeRule, CDDL, RangeCtlOp};
+use super::lexer::Lexer;
+use super::token::Token;
+use std::error::Error;
+use std::mem;
+
+struct Parser<'a> {
+  l: &'a mut Lexer<'a>,
+  cur_token: Token,
+  peek_token: Token,
+  errors: Vec<Box<Error>>,
+}
+
+impl<'a> Parser<'a> {
+  fn new(l: &'a mut Lexer<'a>) -> Result<Parser, Box<Error>> {
+    let mut p = Parser {
+      l: l,
+      cur_token: Token::EOF,
+      peek_token: Token::EOF,
+      errors: Vec::default(),
+    };
+
+    p.next_token()?;
+    p.next_token()?;
+
+    Ok(p)
+  }
+
+  fn next_token(&mut self) -> Result<(), Box<Error>> {
+    mem::swap(&mut self.cur_token, &mut self.peek_token);
+    self.peek_token = self.l.next_token()?;
+    Ok(())
+  }
+
+  fn parse_cddl(&mut self) -> Result<CDDL, Box<Error>> {
+    let mut c = CDDL::default();
+
+    while self.cur_token != Token::EOF {
+      c.rules.push(self.parse_rule()?);
+      // self.next_token()?;
+      
+    }
+
+    Ok(c)
+  }
+
+  fn parse_rule(&mut self) -> Result<Rule, Box<Error>> {
+    let name = match &self.cur_token {
+      Token::IDENT(i) => Token::IDENT(i.to_string()),
+      _ => return Err("expected IDENT".into()),
+    };
+
+    let mut gp: Option<GenericParm>;
+
+    if self.peek_token_is(&Token::LANGLEBRACKET) {
+      gp = Some(self.parse_genericparm()?);
+    } else {
+      gp = None;
+    }
+
+    if !self.expect_peek(&Token::ASSIGN)
+      && !self.expect_peek(&Token::TCHOICEALT)
+      && !self.expect_peek(&Token::GCHOICEALT)
+    {
+      return Err("Expected ASSIGN".into());
+    }
+
+    let mut is_type_choice_alternate = false;
+    let mut is_group_choice_alternate = false;
+
+    if self.cur_token_is(Token::TCHOICEALT) {
+      is_type_choice_alternate = true;
+    } else if self.cur_token_is(Token::GCHOICEALT) {
+      is_group_choice_alternate = true;
+    }
+
+    
+
+    self.next_token()?;
+
+    let mut t: Type;
+
+    // Parse grpent
+    if self.cur_token_is(Token::LPAREN) {
+      unimplemented!();
+    } else {
+      t = self.parse_type()?;
+    }
+
+    let tr = TypeRule {
+      name: Identifier(name),
+      generic_param: gp,
+      is_type_choice_alternate: is_type_choice_alternate,
+      value: t,
+    };
+
+    println!("rule value: {:?}", tr);
+
+    Ok(Rule::Type(tr))
+  }
+
+  fn parse_genericparm(&mut self) -> Result<GenericParm, Box<Error>> {
+    self.next_token()?;
+
+    let mut generic_params = GenericParm(Vec::new());
+
+    while !self.cur_token_is(Token::RANGLEBRACKET) {
+      match &self.cur_token {
+        Token::IDENT(i) => {
+          generic_params.0.push(Identifier::from(i.to_string()));
+          self.next_token()?;
+        }
+        Token::COMMA => self.next_token()?,
+        _ => return Err("Illegal token".into()),
+      }
+    }
+
+    self.next_token()?;
+
+    Ok(generic_params)
+  }
+
+  fn parse_type(&mut self) -> Result<Type, Box<Error>> {
+    let mut t = Type(Vec::new());
+
+    t.0.push(self.parse_type1()?);
+
+    while self.cur_token_is(Token::TCHOICE) {
+      self.next_token()?;
+      t.0.push(self.parse_type1()?);
+    }
+
+    Ok(t)
+  }
+
+  fn parse_type1(&mut self) -> Result<Type1, Box<Error>> {
+    match &self.cur_token {
+      Token::RANGE((l, u, i)) => {
+        Ok(Type1 {
+          type2: Type2::Value(l.to_string().into()),
+          operator: Some((RangeCtlOp::RangeOp(*i), Type2::Value(u.to_string().into()))),
+        })
+      }
+      Token::IDENT(_) => {
+        Ok(Type1{
+          type2: self.parse_type2()?,
+          operator: None,
+        })
+      }
+      _ => Err("Unknown".into()),
+    }
+  }
+
+  fn parse_type2(&mut self) -> Result<Type2, Box<Error>> {
+    let t2 = match &self.cur_token {
+      // value
+      Token::DQUOTE => {
+        if !self.expect_peek(&Token::IDENT("".into())) {
+          return Err("Expecting \"IDENT(String)\"".into());
+        }
+
+        match &self.cur_token {
+          Token::IDENT(ident) => Ok(Type2::Value(ident.to_string().into())),
+          _ => Err("Expecting \"IDENT(String)\"".into()),
+        }
+      }
+      // typename [genericarg]
+      Token::IDENT(ident) => {
+        // optional genericarg detected
+        // if self.peek_token_is(&Token::LANGLEBRACKET) {
+
+        // }
+
+        Ok(Type2::Typename((ident.to_string().into(), None)))
+      }
+      _ => return Err("Unknown".into()),
+    };
+
+    self.next_token()?;
+    
+    t2
+  }
+
+  fn cur_token_is(&self, t: Token) -> bool {
+    mem::discriminant(&self.cur_token) == mem::discriminant(&t)
+  }
+
+  fn peek_token_is(&self, t: &Token) -> bool {
+    mem::discriminant(&self.peek_token) == mem::discriminant(&t)
+  }
+
+  fn expect_peek(&mut self, t: &Token) -> bool {
+    if self.peek_token_is(t) {
+      return self.next_token().is_ok();
+    }
+
+    self.peek_error(t);
+
+    false
+  }
+
+  fn peek_error(&mut self, t: &Token) {
+    self.errors.push(
+      format!(
+        "expected next token to be {:?}, got {:?} instead",
+        t, self.peek_token
+      )
+      .into(),
+    )
+  }
+}
+
+#[cfg(test)]
+mod tests {
+  use super::super::{ast, lexer::Lexer};
+  use super::*;
+
+  #[test]
+  fn verify_rule() -> Result<(), Box<Error>> {
+    let input = r#"myrule = myotherrule
+
+secondrule = thirdrule"#;
+
+    let mut l = Lexer::new(input);
+    let mut p = Parser::new(&mut l)?;
+
+    let cddl = p.parse_cddl()?;
+    check_parser_errors(&p)?;
+
+    if cddl.rules.len() != 2 {
+      eprintln!(
+        "cddl.rules does not contain 2 statements. got='{}'",
+        cddl.rules.len()
+      );
+    }
+
+    let expected_identifiers = ["myrule", "secondrule"];
+
+    for (idx, expected_identifier) in expected_identifiers.iter().enumerate() {
+      let rule = &cddl.rules[idx];
+      assert!(test_rule(rule, expected_identifier));
+    }
+
+    Ok(())
+  }
+
+  fn test_rule(r: &Rule, name: &str) -> bool {
+    match r {
+      Rule::Type(tr) => {
+        if tr.name.0.to_string() != name {
+          eprintln!(
+            "rule.name.value not '{}'. got={}",
+            name,
+            tr.name.0.to_string()
+          );
+          return false;
+        }
+
+        if tr.name.token_literal().unwrap() != format!("{:?}", Token::IDENT(name.into())) {
+          eprintln!(
+            "rule.value not '{}'. got={}",
+            name,
+            tr.name.token_literal().unwrap()
+          );
+          return false;
+        }
+
+        true
+      }
+      _ => false,
+    }
+  }
+
+  #[test]
+  fn verify_type() -> Result<(), Box<Error>> {
+    let input = r#"tchoice1 / tchoice2"#;
+
+    let mut l = Lexer::new(input);
+    let mut p = Parser::new(&mut l)?;
+
+    let t = p.parse_type()?;
+    check_parser_errors(&p)?;
+
+    if t.0.len() != 2 {
+      eprintln!(
+        "type.0 does not contain 2 type choices. got='{}'",
+        t.0.len()
+      );
+    }
+
+    let expected_t1_identifiers = ["tchoice1", "tchoice2"];
+
+    for (idx, expected_t1_identifier) in expected_t1_identifiers.iter().enumerate() {
+      let t_choice = &t.0[idx];
+      assert_eq!(t_choice.type2.to_string(), *expected_t1_identifier);
+    }
+
+    Ok(())
+  }
+
+  #[test]
+  fn verify_genericparm() -> Result<(), Box<Error>> {
+    let input = r#"<t, v>"#;
+
+    let mut l = Lexer::new(input);
+    let mut p = Parser::new(&mut l)?;
+
+    let gps = p.parse_genericparm()?;
+    check_parser_errors(&p)?;
+
+    if gps.0.len() != 2 {
+      eprintln!("GenericParm does not contain 2 generic parameters. got='{}'", gps.0.len());
+    }
+
+    let expected_generic_params = ["t", "v"];
+
+    for (idx, expected_generic_param) in expected_generic_params.iter().enumerate() {
+      let gp = &gps.0[idx];
+      assert_eq!(gp.to_string(), *expected_generic_param);
+    }
+
+    Ok(())
+  }
+
+  fn check_parser_errors(p: &Parser) -> Result<(), Box<Error>> {
+    if p.errors.len() == 0 {
+      return Ok(());
+    }
+
+    for err in p.errors.iter() {
+      eprintln!("parser error: {}", err.to_string());
+    }
+
+    Err("Parser has errors".into())
+  }
+}
diff --git a/src/repl.rs b/src/repl.rs
new file mode 100644
index 00000000..5f1b8661
--- /dev/null
+++ b/src/repl.rs
@@ -0,0 +1,24 @@
+use super::lexer::Lexer;
+use super::token::Token;
+use std::error;
+use std::io::{BufRead, Write};
+
+const PROMPT: &[u8] = b">> ";
+
+pub fn start<R: BufRead, W: Write>(mut reader: R, mut writer: W) -> Result<(), Box<error::Error>> {
+  loop {
+    writer.write(PROMPT)?;
+    writer.flush()?;
+
+    let mut line = String::new();
+    reader.read_line(&mut line)?;
+
+    let mut l = Lexer::new(&*line);
+    let mut tok = l.next_token()?;
+    while tok != Token::EOF {
+      writer.write(format!("{:?} [literal: \"{}\"]\n", tok, tok.to_string()).as_bytes())?;
+      writer.flush()?;
+      tok = l.next_token()?;
+    }
+  }
+}
diff --git a/src/token.rs b/src/token.rs
new file mode 100644
index 00000000..ecf90e7e
--- /dev/null
+++ b/src/token.rs
@@ -0,0 +1,239 @@
+use std::fmt;
+
+#[derive(PartialEq, Debug)]
+pub enum Token {
+  ILLEGAL,
+  EOF,
+
+  IDENT(String),
+  INTLITERAL(usize),
+  FLOATLITERAL(f64),
+  TAG((usize, String)),
+
+  // Operators
+  ASSIGN,
+  OPTIONAL,
+  ASTERISK,
+  OCCURENCE((usize, usize)),
+  PLUS,
+  UNWRAP,
+  CONTROL(String),
+
+  // Delimiters
+  COMMA,
+  COLON,
+  SEMICOLON,
+
+  TCHOICE,
+  GCHOICE,
+  TCHOICEALT,
+  GCHOICEALT,
+  ARROWMAP,
+  CUT,
+  TSOCKET,
+  GSOCKET,
+
+  RANGE((String, String, bool)),
+
+  LPAREN,
+  RPAREN,
+  LBRACE,
+  RBRACE,
+  LBRACKET,
+  RBRACKET,
+  LANGLEBRACKET,
+  RANGLEBRACKET,
+  DQUOTE,
+
+  // Control operators
+  SIZE,
+  BITS,
+  REGEXP,
+  CBOR,
+  CBORSEQ,
+  WITHIN,
+  AND,
+  LT,
+  LE,
+  GT,
+  GE,
+  EQ,
+  NE,
+  DEFAULT,
+
+  GTOCHOICE,
+
+  // Standard prelude
+  FALSE,
+  TRUE,
+  BOOL,
+  NIL,
+  NULL,
+  UINT,
+  NINT,
+  INT,
+  FLOAT16,
+  FLOAT32,
+  FLOAT64,
+  FLOAT1632,
+  FLOAT3264,
+  FLOAT,
+  BSTR,
+  TSTR,
+  ANY,
+  BYTES,
+  TEXT,
+  TDATE,
+  TIME,
+  NUMBER,
+  BIGUINT,
+  BIGNINT,
+  INTEGER,
+  UNSIGNED,
+  DECFRAC,
+  BIGFLOAT,
+  EB64URL,
+  EB64LEGACY,
+  EB16,
+  ENCODEDCBOR,
+  URI,
+  B64URL,
+  B64LEGACY,
+  TREGEXP,
+  MIMEMESSAGE,
+  CBORANY,
+  UNDEFINED,
+}
+
+impl<'a> fmt::Display for Token {
+  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    match self {
+      Token::IDENT(ident) => write!(f, "{}", ident),
+      Token::ILLEGAL => write!(f, ""),
+      Token::ASSIGN => write!(f, "="),
+      Token::PLUS => write!(f, "+"),
+      Token::OPTIONAL => write!(f, "?"),
+      Token::ASTERISK => write!(f, "*"),
+      Token::LPAREN => write!(f, "("),
+      Token::RPAREN => write!(f, ")"),
+      Token::LBRACE => write!(f, "{{"),
+      Token::RBRACE => write!(f, "}}"),
+      Token::TCHOICE => write!(f, "/"),
+      Token::TCHOICEALT => write!(f, "/="),
+      Token::GCHOICEALT => write!(f, "//="),
+      Token::COMMA => write!(f, ","),
+      Token::SEMICOLON => write!(f, ";"),
+      Token::COLON => write!(f, ":"),
+      Token::CUT => write!(f, "^"),
+      Token::EOF => write!(f, ""),
+      Token::TSTR => write!(f, "tstr"),
+      Token::LANGLEBRACKET => write!(f, "<"),
+      Token::RANGLEBRACKET => write!(f, ">"),
+      Token::DQUOTE => write!(f, "\""),
+      Token::INT => write!(f, "int"),
+      Token::UINT => write!(f, "uint"),
+      Token::INTLITERAL(i) => write!(f, "{}", i),
+      Token::FLOATLITERAL(fl) => write!(f, "{}", fl),
+      Token::ARROWMAP => write!(f, "=>"),
+      Token::SIZE => write!(f, ".size"),
+      Token::BITS => write!(f, ".bits"),
+      Token::REGEXP => write!(f, ".regexp"),
+      Token::CBOR => write!(f, ".cbor"),
+      Token::CBORSEQ => write!(f, ".cborseq"),
+      Token::WITHIN => write!(f, ".within"),
+      Token::AND => write!(f, ".and"),
+      Token::LT => write!(f, ".lt"),
+      Token::LE => write!(f, ".le"),
+      Token::GT => write!(f, ".gt"),
+      Token::GE => write!(f, ".ge"),
+      Token::EQ => write!(f, ".eq"),
+      Token::NE => write!(f, ".ne"),
+      Token::DEFAULT => write!(f, ".default"),
+      Token::NUMBER => write!(f, "number"),
+      Token::BSTR => write!(f, "bstr"),
+      Token::GCHOICE => write!(f, "//"),
+      Token::TRUE => write!(f, "true"),
+      Token::GTOCHOICE => write!(f, "&"),
+      Token::RANGE((l, u, i)) => {
+        if *i {
+          return write!(f, "{}..{}", l, u);
+        }
+
+        write!(f, "{}...{}", l, u)
+      }
+      Token::TAG((tn, tt)) => {
+        if tt != "" {
+          return write!(f, "#6.{}({})", tn, tt);
+        }
+
+        write!(f, "#6.{}", tn)
+      }
+      _ => write!(f, ""),
+    }
+  }
+}
+
+pub fn lookup_control(ident: &str) -> Token {
+  match ident {
+    "size" => Token::SIZE,
+    "bits" => Token::BITS,
+    "regexp" => Token::REGEXP,
+    "cbor" => Token::CBOR,
+    "cborseq" => Token::CBORSEQ,
+    "within" => Token::WITHIN,
+    "and" => Token::AND,
+    "lt" => Token::LT,
+    "le" => Token::LE,
+    "gt" => Token::GT,
+    "ge" => Token::GE,
+    "eq" => Token::EQ,
+    "ne" => Token::NE,
+    "default" => Token::DEFAULT,
+    _ => Token::ILLEGAL,
+  }
+}
+
+pub fn lookup_ident(ident: &str) -> Token {
+  match ident {
+    "false" => Token::FALSE,
+    "true" => Token::TRUE,
+    "bool" => Token::BOOL,
+    "nil" => Token::NIL,
+    "null" => Token::NULL,
+    "uint" => Token::UINT,
+    "nint" => Token::NINT,
+    "int" => Token::INT,
+    "float16" => Token::FLOAT16,
+    "float32" => Token::FLOAT32,
+    "float64" => Token::FLOAT64,
+    "float16-32" => Token::FLOAT1632,
+    "float32-64" => Token::FLOAT3264,
+    "float" => Token::FLOAT,
+    "bstr" => Token::BSTR,
+    "tstr" => Token::TSTR,
+    "any" => Token::ANY,
+    "bytes" => Token::BYTES,
+    "text" => Token::TEXT,
+    "tdate" => Token::TDATE,
+    "time" => Token::TIME,
+    "number" => Token::NUMBER,
+    "biguint" => Token::BIGUINT,
+    "bignint" => Token::BIGNINT,
+    "integer" => Token::INTEGER,
+    "unsigned" => Token::UNSIGNED,
+    "decfrac" => Token::DECFRAC,
+    "bigfloat" => Token::BIGFLOAT,
+    "eb64url" => Token::EB64URL,
+    "eb64legacy" => Token::EB64LEGACY,
+    "eb16" => Token::EB16,
+    "encoded-cbor" => Token::ENCODEDCBOR,
+    "uri" => Token::URI,
+    "b64url" => Token::B64URL,
+    "b64legacy" => Token::B64LEGACY,
+    "regexp" => Token::TREGEXP,
+    "mime-message" => Token::MIMEMESSAGE,
+    "cbor-any" => Token::CBORANY,
+    "undefined" => Token::UNDEFINED,
+    _ => Token::IDENT(ident.into()),
+  }
+}