Skip to content

Commit

Permalink
Merge branch 'token-refactor'
Browse files Browse the repository at this point in the history
  • Loading branch information
Seasawher committed Aug 16, 2024
2 parents b9175d8 + 984f31b commit f68ac4f
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 115 deletions.
48 changes: 23 additions & 25 deletions Monkey/Lexer/Lexer.lean
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def readIdentifier : StateM Lexer String := do
|>.drop position

/-- Lexer を更新しつつ、Number ではない文字列が出てくるまで読み進める -/
def readNumber : StateM Lexer String := do
def readNumber : StateM Lexer Int := do
let mut l ← get
let position := l.position
while l.ch.isDigit do
Expand All @@ -65,8 +65,9 @@ def readNumber : StateM Lexer String := do
return l.input
|>.take l.position
|>.drop position
|>.toInt!

open TokenType
open Token

-- Char を String に変換する関数
#check String.singleton
Expand All @@ -83,34 +84,31 @@ def skipWhitespace : StateM Lexer Unit := do
def nextToken : StateM Lexer Token := do
skipWhitespace
let mut l ← get
let ch := String.singleton l.ch
let mut tok := match l.ch with
| '=' => ⟨.ASSIGN, ch⟩
| '+' => ⟨.PLUS, ch⟩
| '-' => ⟨.MINUS, ch⟩
| '!' => ⟨.BANG, ch⟩
| '/' => ⟨.SLASH, ch⟩
| '*' => ⟨.ASTERISK, ch⟩
| '<' => ⟨.LT, ch⟩
| '>' => ⟨.GT, ch⟩
| '(' => ⟨.LPAREN, ch⟩
| ')' => ⟨.RPAREN, ch⟩
| '{' => ⟨.LBRACE, ch⟩
| '}' => ⟨.RBRACE, ch⟩
| ',' => ⟨.COMMA, ch⟩
| ';' => ⟨.SEMICOLON, ch⟩
| '\x00' => ⟨.EOF, ""
| _ => ⟨.ILLEGAL, ch⟩
| '=' => ASSIGN
| '+' => PLUS
| '-' => MINUS
| '!' => BANG
| '/' => SLASH
| '*' => ASTERISK
| '<' => LT
| '>' => GT
| '(' => LPAREN
| ')' => RPAREN
| '{' => LBRACE
| '}' => RBRACE
| ',' => COMMA
| ';' => SEMICOLON
| '\x00' => EOF
| _ => ILLEGAL
if l.ch.isLetter then
let literal ← readIdentifier
let tokenType := LookupIdent literal
tok := ⟨tokenType, literal⟩
return tok
let token := LookupIdent literal
return token
else if l.ch.isDigit then
let literal ← readNumber
let tokenType := INT
tok := ⟨tokenType, literal⟩
return tok
let token : Token := INT literal
return token
readChar
return tok

Expand Down
154 changes: 91 additions & 63 deletions Monkey/Lexer/LexerTest.lean
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
import Monkey.Lexer.Lexer
import Monkey.Token.Token

open TokenType Lexer
open Token Lexer

/-- NestToken 関数をテストする -/
def testNextToken (input : String) (expected : Array (TokenType × String)) : IO Unit := do
def testNextToken (input : String) (expected : Array Token) : IO Unit := do
let mut l : Lexer := Lexer.new input
for tt in expected do
for expTok in expected do
let ⟨tok, l'⟩ := l.nextToken |>.run
l := l'
if tok.type ≠ tt.fst then
throw <| .userError s!"tests failed: - tokentype wrong at \"{tt.snd}\". expected={tt.fst}, got={tok.type}"
if tok.literal ≠ tt.snd then
throw <| .userError s!"tests failed: - literal wrong. expected={tt.snd}, got={tok.literal}"
if tok ≠ expTok then
throw <| .userError s!"tests failed: - token wrong at \"{expTok}\". expected={expTok}, got={tok}"

IO.println s!"ok!"

#eval testNextToken
(input := "=+(){},;")
(expected := #[
(ASSIGN, "="),
(PLUS, "+"),
(LPAREN, "("),
(RPAREN, ")"),
(LBRACE, "{"),
(RBRACE, "}"),
(COMMA, ","),
(SEMICOLON, ";"),
(EOF, "")
ASSIGN,
PLUS,
LPAREN,
RPAREN,
LBRACE,
RBRACE,
COMMA,
SEMICOLON,
EOF
])

#eval testNextToken
Expand All @@ -39,58 +38,87 @@ def testNextToken (input : String) (expected : Array (TokenType × String)) : IO
let result = add(five, ten);
")
(expected := #[
(LET, "let"),
(IDENT, "five"),
(ASSIGN, "="),
(INT, "5"),
(SEMICOLON, ";"),
(LET, "let"),
(IDENT, "ten"),
(ASSIGN, "="),
(INT, "10"),
(SEMICOLON, ";"),
(LET, "let"),
(IDENT, "add"),
(ASSIGN, "="),
(FUNCTION, "fn"),
(LPAREN, "("),
(IDENT, "x"),
(COMMA, ","),
(IDENT, "y"),
(RPAREN, ")"),
(LBRACE, "{"),
(IDENT, "x"),
(PLUS, "+"),
(IDENT, "y"),
(SEMICOLON, ";"),
(RBRACE, "}"),
(SEMICOLON, ";"),
(LET, "let"),
(IDENT, "result"),
(ASSIGN, "="),
(IDENT, "add"),
(LPAREN, "("),
(IDENT, "five"),
(COMMA, ","),
(IDENT, "ten"),
(RPAREN, ")"),
(SEMICOLON, ";"),
(EOF, "")
LET,
IDENT "five",
ASSIGN,
INT 5,
SEMICOLON,
LET,
IDENT "ten",
ASSIGN,
INT 10,
SEMICOLON,
LET,
IDENT "add",
ASSIGN,
FUNCTION,
LPAREN,
IDENT "x",
COMMA,
IDENT "y",
RPAREN,
LBRACE,
IDENT "x",
PLUS,
IDENT "y",
SEMICOLON,
RBRACE,
SEMICOLON,
LET,
IDENT "result",
ASSIGN,
IDENT "add",
LPAREN,
IDENT "five",
COMMA,
IDENT "ten",
RPAREN,
SEMICOLON,
EOF
])

#eval testNextToken
(input := "!-/*5;
5 < 10 > 5;")
(expected := #[
(BANG, "!"),
(MINUS, "-"),
(SLASH, "/"),
(ASTERISK, "*"),
(INT, "5"),
(SEMICOLON, ";"),
(INT, "5"),
(LT, "<"),
(INT, "10"),
(GT, ">"),
(INT, "5")
BANG,
MINUS,
SLASH,
ASTERISK,
INT 5,
SEMICOLON,
INT 5,
LT,
INT 10,
GT,
INT 5,
SEMICOLON,
EOF
])

#eval testNextToken
(input := "if (5 < 10) {
return true;
} else {
return false;
}")
(expected := #[
IF,
LPAREN,
INT 5,
LT,
INT 10,
RPAREN,
LBRACE,
RETURN,
TRUE,
SEMICOLON,
RBRACE,
ELSE,
LBRACE,
RETURN,
FALSE,
SEMICOLON,
RBRACE,
EOF
])
73 changes: 46 additions & 27 deletions Monkey/Token/Token.lean
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import Lean.Data.HashMap

/-- Token の種類 -/
inductive TokenType where
/-- トークン
本にある Token の定義では structure になっていたので、
literal が必要ないのときでも literal の情報があって冗長だった。
それを修正した定義にしている。 -/
inductive Token where
/-- 識別子 -/
| IDENT (name : String)
/-- 数値リテラル -/
| INT (value : Int)
/-- 受け入れ不能エラー -/
| ILLEGAL
/-- ファイル終端 -/
| EOF
/-- 識別子 -/
| IDENT
/-- 整数 -/
| INT
/-- 代入記号 "=" -/
| ASSIGN
/-- 足し算記号 + -/
Expand Down Expand Up @@ -42,15 +46,25 @@ inductive TokenType where
| LT
/-- 大なり ">" -/
| GT
deriving Repr, DecidableEq
/-- true : Bool -/
| TRUE
/-- false : Bool -/
| FALSE
/-- IF キーワード -/
| IF
/-- ELSE キーワード -/
| ELSE
/-- RETURN キーワード -/
| RETURN
deriving Repr, BEq, DecidableEq

/-- TokenType を文字列に変換する -/
def TokenType.toString (t : TokenType) : String :=
/-- Token を文字列に変換する -/
def Token.toString (t : Token) : String :=
match t with
| .ILLEGAL => "ILLEGAL"
| .EOF => "EOF"
| .IDENT => "IDENT"
| .INT => "INT"
| .IDENT lit => lit
| .INT lit => ToString.toString lit
| .ASSIGN => "="
| .PLUS => "+"
| .COMMA => ","
Expand All @@ -67,27 +81,32 @@ def TokenType.toString (t : TokenType) : String :=
| .SLASH => "/"
| .LT => "<"
| .GT => ">"
| .TRUE => "TRUE"
| .FALSE => "FALSE"
| .IF => "IF"
| .ELSE => "ELSE"
| .RETURN => "RETURN"

instance : ToString TokenType where
toString := TokenType.toString

set_option linter.missingDocs false in

/-- トークン -/
structure Token where
type : TokenType
literal : String
deriving Repr, BEq, DecidableEq
instance : ToString Token where
toString := Token.toString

open TokenType Lean
open Lean Token

/-- 言語のキーワード -/
def keywords : HashMap String TokenType :=
let list : List (String × TokenType) := [("fn", FUNCTION), ("let", LET)]
/-- 言語のキーワードを格納する辞書 -/
def keywords : HashMap String Token :=
let list : List (String × Token) := [
("fn", FUNCTION),
("let", LET),
("true", TRUE),
("false", FALSE),
("if", IF),
("else", ELSE),
("return", RETURN),
]
HashMap.ofList list

/-- ユーザ定義の識別子なのか、言語のキーワードなのか分類する -/
def LookupIdent (ident : String) : TokenType :=
def LookupIdent (ident : String) : Token :=
match keywords.find? ident with
| some tok => tok
| none => IDENT
| none => IDENT ident

0 comments on commit f68ac4f

Please sign in to comment.