diff --git a/Monkey/Lexer/Lexer.lean b/Monkey/Lexer/Lexer.lean index 3d09d5b..40949ab 100644 --- a/Monkey/Lexer/Lexer.lean +++ b/Monkey/Lexer/Lexer.lean @@ -56,7 +56,7 @@ def readIdentifier : StateM Lexer String := do |>.drop position /-- Lexer を更新しつつ、Number ではない文字列が出てくるまで読み進める -/ -def readNumber : StateM Lexer String := do +def readNumber : StateM Lexer Int := do let mut l ← get let position := l.position while l.ch.isDigit do @@ -65,8 +65,9 @@ def readNumber : StateM Lexer String := do return l.input |>.take l.position |>.drop position + |>.toInt! -open TokenType +open Token -- Char を String に変換する関数 #check String.singleton @@ -83,34 +84,31 @@ def skipWhitespace : StateM Lexer Unit := do def nextToken : StateM Lexer Token := do skipWhitespace let mut l ← get - let ch := String.singleton l.ch let mut tok := match l.ch with - | '=' => ⟨.ASSIGN, ch⟩ - | '+' => ⟨.PLUS, ch⟩ - | '-' => ⟨.MINUS, ch⟩ - | '!' => ⟨.BANG, ch⟩ - | '/' => ⟨.SLASH, ch⟩ - | '*' => ⟨.ASTERISK, ch⟩ - | '<' => ⟨.LT, ch⟩ - | '>' => ⟨.GT, ch⟩ - | '(' => ⟨.LPAREN, ch⟩ - | ')' => ⟨.RPAREN, ch⟩ - | '{' => ⟨.LBRACE, ch⟩ - | '}' => ⟨.RBRACE, ch⟩ - | ',' => ⟨.COMMA, ch⟩ - | ';' => ⟨.SEMICOLON, ch⟩ - | '\x00' => ⟨.EOF, ""⟩ - | _ => ⟨.ILLEGAL, ch⟩ + | '=' => ASSIGN + | '+' => PLUS + | '-' => MINUS + | '!' => BANG + | '/' => SLASH + | '*' => ASTERISK + | '<' => LT + | '>' => GT + | '(' => LPAREN + | ')' => RPAREN + | '{' => LBRACE + | '}' => RBRACE + | ',' => COMMA + | ';' => SEMICOLON + | '\x00' => EOF + | _ => ILLEGAL if l.ch.isLetter then let literal ← readIdentifier - let tokenType := LookupIdent literal - tok := ⟨tokenType, literal⟩ - return tok + let token := LookupIdent literal + return token else if l.ch.isDigit then let literal ← readNumber - let tokenType := INT - tok := ⟨tokenType, literal⟩ - return tok + let token : Token := INT literal + return token readChar return tok diff --git a/Monkey/Lexer/LexerTest.lean b/Monkey/Lexer/LexerTest.lean index 6a36c16..511b7bf 100644 --- a/Monkey/Lexer/LexerTest.lean +++ b/Monkey/Lexer/LexerTest.lean @@ -1,33 +1,32 @@ import Monkey.Lexer.Lexer import Monkey.Token.Token -open TokenType Lexer +open Token Lexer /-- NestToken 関数をテストする -/ -def testNextToken (input : String) (expected : Array (TokenType × String)) : IO Unit := do +def testNextToken (input : String) (expected : Array Token) : IO Unit := do let mut l : Lexer := Lexer.new input for tt in expected do + for expTok in expected do let ⟨tok, l'⟩ := l.nextToken |>.run l := l' - if tok.type ≠ tt.fst then - throw <| .userError s!"tests failed: - tokentype wrong at \"{tt.snd}\". expected={tt.fst}, got={tok.type}" - if tok.literal ≠ tt.snd then - throw <| .userError s!"tests failed: - literal wrong. expected={tt.snd}, got={tok.literal}" + if tok ≠ expTok then + throw <| .userError s!"tests failed: - token wrong at \"{expTok}\". expected={expTok}, got={tok}" IO.println s!"ok!" #eval testNextToken (input := "=+(){},;") (expected := #[ - (ASSIGN, "="), - (PLUS, "+"), - (LPAREN, "("), - (RPAREN, ")"), - (LBRACE, "{"), - (RBRACE, "}"), - (COMMA, ","), - (SEMICOLON, ";"), - (EOF, "") + ASSIGN, + PLUS, + LPAREN, + RPAREN, + LBRACE, + RBRACE, + COMMA, + SEMICOLON, + EOF ]) #eval testNextToken @@ -39,58 +38,87 @@ def testNextToken (input : String) (expected : Array (TokenType × String)) : IO let result = add(five, ten); ") (expected := #[ - (LET, "let"), - (IDENT, "five"), - (ASSIGN, "="), - (INT, "5"), - (SEMICOLON, ";"), - (LET, "let"), - (IDENT, "ten"), - (ASSIGN, "="), - (INT, "10"), - (SEMICOLON, ";"), - (LET, "let"), - (IDENT, "add"), - (ASSIGN, "="), - (FUNCTION, "fn"), - (LPAREN, "("), - (IDENT, "x"), - (COMMA, ","), - (IDENT, "y"), - (RPAREN, ")"), - (LBRACE, "{"), - (IDENT, "x"), - (PLUS, "+"), - (IDENT, "y"), - (SEMICOLON, ";"), - (RBRACE, "}"), - (SEMICOLON, ";"), - (LET, "let"), - (IDENT, "result"), - (ASSIGN, "="), - (IDENT, "add"), - (LPAREN, "("), - (IDENT, "five"), - (COMMA, ","), - (IDENT, "ten"), - (RPAREN, ")"), - (SEMICOLON, ";"), - (EOF, "") + LET, + IDENT "five", + ASSIGN, + INT 5, + SEMICOLON, + LET, + IDENT "ten", + ASSIGN, + INT 10, + SEMICOLON, + LET, + IDENT "add", + ASSIGN, + FUNCTION, + LPAREN, + IDENT "x", + COMMA, + IDENT "y", + RPAREN, + LBRACE, + IDENT "x", + PLUS, + IDENT "y", + SEMICOLON, + RBRACE, + SEMICOLON, + LET, + IDENT "result", + ASSIGN, + IDENT "add", + LPAREN, + IDENT "five", + COMMA, + IDENT "ten", + RPAREN, + SEMICOLON, + EOF ]) #eval testNextToken (input := "!-/*5; 5 < 10 > 5;") (expected := #[ - (BANG, "!"), - (MINUS, "-"), - (SLASH, "/"), - (ASTERISK, "*"), - (INT, "5"), - (SEMICOLON, ";"), - (INT, "5"), - (LT, "<"), - (INT, "10"), - (GT, ">"), - (INT, "5") + BANG, + MINUS, + SLASH, + ASTERISK, + INT 5, + SEMICOLON, + INT 5, + LT, + INT 10, + GT, + INT 5, + SEMICOLON, + EOF + ]) + +#eval testNextToken + (input := "if (5 < 10) { + return true; + } else { + return false; + }") + (expected := #[ + IF, + LPAREN, + INT 5, + LT, + INT 10, + RPAREN, + LBRACE, + RETURN, + TRUE, + SEMICOLON, + RBRACE, + ELSE, + LBRACE, + RETURN, + FALSE, + SEMICOLON, + RBRACE, + EOF ]) diff --git a/Monkey/Token/Token.lean b/Monkey/Token/Token.lean index dff7311..06b21d8 100644 --- a/Monkey/Token/Token.lean +++ b/Monkey/Token/Token.lean @@ -1,15 +1,19 @@ import Lean.Data.HashMap -/-- Token の種類 -/ -inductive TokenType where +/-- トークン + +本にある Token の定義では structure になっていたので、 +literal が必要ないのときでも literal の情報があって冗長だった。 +それを修正した定義にしている。 -/ +inductive Token where + /-- 識別子 -/ + | IDENT (name : String) + /-- 数値リテラル -/ + | INT (value : Int) /-- 受け入れ不能エラー -/ | ILLEGAL /-- ファイル終端 -/ | EOF - /-- 識別子 -/ - | IDENT - /-- 整数 -/ - | INT /-- 代入記号 "=" -/ | ASSIGN /-- 足し算記号 + -/ @@ -42,15 +46,25 @@ inductive TokenType where | LT /-- 大なり ">" -/ | GT -deriving Repr, DecidableEq + /-- true : Bool -/ + | TRUE + /-- false : Bool -/ + | FALSE + /-- IF キーワード -/ + | IF + /-- ELSE キーワード -/ + | ELSE + /-- RETURN キーワード -/ + | RETURN +deriving Repr, BEq, DecidableEq -/-- TokenType を文字列に変換する -/ -def TokenType.toString (t : TokenType) : String := +/-- Token を文字列に変換する -/ +def Token.toString (t : Token) : String := match t with | .ILLEGAL => "ILLEGAL" | .EOF => "EOF" - | .IDENT => "IDENT" - | .INT => "INT" + | .IDENT lit => lit + | .INT lit => ToString.toString lit | .ASSIGN => "=" | .PLUS => "+" | .COMMA => "," @@ -67,27 +81,32 @@ def TokenType.toString (t : TokenType) : String := | .SLASH => "/" | .LT => "<" | .GT => ">" + | .TRUE => "TRUE" + | .FALSE => "FALSE" + | .IF => "IF" + | .ELSE => "ELSE" + | .RETURN => "RETURN" -instance : ToString TokenType where - toString := TokenType.toString - -set_option linter.missingDocs false in - -/-- トークン -/ -structure Token where - type : TokenType - literal : String -deriving Repr, BEq, DecidableEq +instance : ToString Token where + toString := Token.toString -open TokenType Lean +open Lean Token -/-- 言語のキーワード -/ -def keywords : HashMap String TokenType := - let list : List (String × TokenType) := [("fn", FUNCTION), ("let", LET)] +/-- 言語のキーワードを格納する辞書 -/ +def keywords : HashMap String Token := + let list : List (String × Token) := [ + ("fn", FUNCTION), + ("let", LET), + ("true", TRUE), + ("false", FALSE), + ("if", IF), + ("else", ELSE), + ("return", RETURN), + ] HashMap.ofList list /-- ユーザ定義の識別子なのか、言語のキーワードなのか分類する -/ -def LookupIdent (ident : String) : TokenType := +def LookupIdent (ident : String) : Token := match keywords.find? ident with | some tok => tok - | none => IDENT + | none => IDENT ident