From 488f98a83229e4941a7b80307f94d29cb940ac9b Mon Sep 17 00:00:00 2001 From: Seasawher Date: Fri, 26 Jul 2024 22:02:11 +0900 Subject: [PATCH] =?UTF-8?q?=E3=82=BC=E3=83=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Monkey/Lexer/Lexer.lean | 48 +++++++++++++++++++++++++++---------- Monkey/Lexer/LexerTest.lean | 28 +++++++++++----------- 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/Monkey/Lexer/Lexer.lean b/Monkey/Lexer/Lexer.lean index 367b120..3da16c8 100644 --- a/Monkey/Lexer/Lexer.lean +++ b/Monkey/Lexer/Lexer.lean @@ -13,24 +13,47 @@ deriving Repr -- アルファベットかどうか判定する #check Char.isAlpha +/-- 識別子として許可できるような文字列か? +アルファベットであるか、あるいはアンダースコア -/ +def Char.isLetter (ch : Char) : Bool := ch.isAlpha || ch == '_' + namespace Lexer -def readChar (l : Lexer) : Lexer := +/-- Lexer を1文字読み進める -/ +def readChar : StateM Lexer Unit := do + let mut l ← get let l' := if l.readPosition ≥ l.input.length then { l with ch := '\x00' } else { l with ch := l.input.get ⟨l.readPosition⟩} - { l' with position := l.readPosition, readPosition := l.readPosition + 1 } + set { l' with position := l.readPosition, readPosition := l.readPosition + 1 } + +/-- デフォルト値を持たせたコンストラクタの変種 -/ +def mkD (input : String) (position readPosition : Nat := 0) + (ch : Char := '\x00') : Lexer := + { input := input, position := position, readPosition := readPosition, ch := ch } -def new (input : String) (position readPosition : Nat := 0) (ch : Char := '\x00') : Lexer := Id.run do - let mut l := { input := input, position := position, readPosition := readPosition, ch := ch } - l := l.readChar - return l +def new (input : String) : Lexer := + StateT.run readChar (Lexer.mkD input) |> Id.run |>.snd + +#check StateT.run readChar + +/-- Lexer を更新しつつ、letter ではない文字列が出てくるまで読み進める -/ +def readIdentifier : StateM Lexer String := do + let mut l ← get + let position := l.position + while l.ch.isLetter do + readChar + l ← get + return l.input + |>.take l.position + |>.drop position open TokenType +/-- Lexer を更新しながら、次のトークンを読む -/ def nextToken : StateM Lexer Token := do let mut l ← get - let tok := match l.ch with + let mut tok := match l.ch with | '=' => Token.mk ASSIGN (String.singleton l.ch) | '+' => Token.mk PLUS (String.singleton l.ch) | '(' => Token.mk LPAREN (String.singleton l.ch) @@ -40,12 +63,11 @@ def nextToken : StateM Lexer Token := do | ',' => Token.mk COMMA (String.singleton l.ch) | ';' => Token.mk SEMICOLON (String.singleton l.ch) | '\x00' => Token.mk EOF "" - | _ => - if l.ch.isAlpha then - sorry - else - Token.mk ILLEGAL (String.singleton l.ch) - set l.readChar + | _ => Token.mk ILLEGAL (String.singleton l.ch) + if l.ch.isLetter then + let literal ← readIdentifier + tok := Token.mk (if literal == "let" then LET else IDENT) literal + readChar return tok end Lexer diff --git a/Monkey/Lexer/LexerTest.lean b/Monkey/Lexer/LexerTest.lean index 2719586..0d79c03 100644 --- a/Monkey/Lexer/LexerTest.lean +++ b/Monkey/Lexer/LexerTest.lean @@ -1,7 +1,7 @@ import Monkey.Lexer.Lexer import Monkey.Token.Token -open TokenType +open TokenType Lexer def testNextToken (input : String) (expected : Array (TokenType × String)) : IO Unit := do let mut l : Lexer := Lexer.new input @@ -15,19 +15,19 @@ def testNextToken (input : String) (expected : Array (TokenType × String)) : IO IO.println s!"ok!" --- #eval testNextToken --- (input := "=+(){},;") --- (expected := #[ --- (ASSIGN, "="), --- (PLUS, "+"), --- (LPAREN, "("), --- (RPAREN, ")"), --- (LBRACE, "{"), --- (RBRACE, "}"), --- (COMMA, ","), --- (SEMICOLON, ";"), --- (EOF, "") --- ]) +#eval testNextToken + (input := "=+(){},;") + (expected := #[ + (ASSIGN, "="), + (PLUS, "+"), + (LPAREN, "("), + (RPAREN, ")"), + (LBRACE, "{"), + (RBRACE, "}"), + (COMMA, ","), + (SEMICOLON, ";"), + (EOF, "") + ]) -- #eval testNextToken -- (input := "let five = 5;