From 473c25c6e533207b9ca0ba6295019c92c78b1a5e Mon Sep 17 00:00:00 2001 From: Severen Redwood Date: Thu, 30 May 2024 00:49:19 +1200 Subject: [PATCH] Refactor parser --- src/Sly/Eval.hs | 1 + src/Sly/Parser.hs | 95 ++++++++++++++++++++++------------------------- 2 files changed, 46 insertions(+), 50 deletions(-) diff --git a/src/Sly/Eval.hs b/src/Sly/Eval.hs index cb23e3a..decc43e 100644 --- a/src/Sly/Eval.hs +++ b/src/Sly/Eval.hs @@ -30,6 +30,7 @@ import Data.Set qualified as Set import Data.Text qualified as T import Sly.Parser +import Sly.Syntax -- | Type alias for a map of top-level let bindings. type Bindings = Seq (Name, Term) diff --git a/src/Sly/Parser.hs b/src/Sly/Parser.hs index 77be5da..6a4db9a 100644 --- a/src/Sly/Parser.hs +++ b/src/Sly/Parser.hs @@ -1,12 +1,7 @@ -- SPDX-FileCopyrightText: 2022 Severen Redwood -- SPDX-License-Identifier: GPL-3.0-or-later -module Sly.Parser ( - Name (..), - Term (..), - Statement (..), - parse, -) where +module Sly.Parser (parse) where import Control.Monad.Combinators.Expr (Operator (..), makeExprParser) import Data.Foldable (foldr') @@ -22,26 +17,25 @@ import qualified Data.Text as T import qualified Text.Megaparsec.Char.Lexer as L {- Grammar Notes - * Application is parsed with the highest precedence and associativity to the left, i.e. - f x y = ((f x) y). - * Abstractions are parsed with bodies extending as far to the right as possible, i.e. - λx -> λy -> x y x = (λx -> (λy -> ((x y) x))). - * Bracketing of terms can be used to override precedence and associativity when - required. - * Statements are either an assignment (let X := Y) or a λ-calculus term and end with a - full stop '.'. - - For anyone editing this file: One good test to flush out any bugs in the parser is to - check whether the following equality is reflected by the parse trees of the LHS and RHS - respectively: + * Application is parsed with the highest precedence and associativity to the + left, i.e. f x y = ((f x) y). + * Abstractions are parsed with bodies extending as far to the right as + possible, i.e. λx -> λy -> x y x = (λx -> (λy -> ((x y) x))). + * Bracketing of terms can be used to override precedence and associativity + when required. + * Statements are either an assignment (let X := Y) or a λ-calculus term and + end with a full stop '.'. + + For anyone editing this file: One good test to flush out any bugs in the + parser is to check whether the following equality is reflected by the parse + trees of the LHS and RHS, respectively: λf -> (λx -> f (x x)) (λx -> f (x x)) = (λf -> ((λx -> (f (x x))) (λx -> (f (x x))))). -} -{- | The parser monad. - - This is a Parsec type synonym to both help type inference and the compiler's - optimiser. --} +-- | The parser monad. +-- +-- This is a Parsec type synonym to both help type inference and the compiler's +-- optimiser. type Parser = Parsec Void Text -- | Words that are reserved as keywords and thus disallowed as names. @@ -70,9 +64,8 @@ symbol = L.symbol spaceConsumer punc :: Text -> Parser () punc = void . symbol -{- | Create a parser that applies the given parser to an expression between a pair of - round brackets. --} +-- | Create a parser that applies the given parser to an expression between a +-- pair of round brackets. brackets :: Parser a -> Parser a brackets = between (punc "(") (punc ")") @@ -81,24 +74,25 @@ nameStart :: Parser Char nameStart = satisfy \c -> isXIDStart c && c /= 'λ' -- | Parse a sequence of 'continue' characters in a name. -nameContinue :: Parser [Char] +nameContinue :: Parser String nameContinue = many $ satisfy \c -> isXIDContinue c || c == '\'' -- | Parse a name according to the Unicode Standard Annex #31. name :: Parser Name -name = Name <$> (lexeme . try) (p >>= check) +name = Name <$> lexeme (word >>= check) "name" where - p = T.pack <$> ((:) <$> nameStart <*> nameContinue) - check s - | s `notElem` keywords = return s - -- TODO: See if the positioning of this error message (when output) can be improved. - | otherwise = fail $ "keyword " <> T.unpack s <> " cannot be a name" + word = T.pack <$> ((:) <$> nameStart <*> nameContinue) + check w + | w `notElem` keywords = return w + -- TODO: See if the positioning of this error message (when output) can be + -- improved. + | otherwise = fail $ "keyword " <> T.unpack w <> " cannot be a name" -- | Parse a variable term. variable :: Parser Term -variable = Var <$> name "variable" +variable = Var <$> name --- | Parse a natural number. +-- | Parse a natural number literal. natural :: Parser Term natural = toChurchNat <$> (lexeme L.decimal >>= check) where @@ -107,6 +101,7 @@ natural = toChurchNat <$> (lexeme L.decimal >>= check) | otherwise = fail $ "naturals larger than " <> show maxInt <> " are disallowed" maxInt = maxBound @Int +-- | Parse a Boolean literal. boolean :: Parser Term boolean = toChurchBool <$> (single '#' *> (try true <|> false)) where @@ -116,22 +111,22 @@ boolean = toChurchBool <$> (single '#' *> (try true <|> false)) -- | Parse a λ-abstraction. abstraction :: Parser Term abstraction = do - punc "\\" <|> punc "λ" - binders <- (name "variable") `sepBy1` spaceConsumer - punc "->" <|> punc "↦" + punc "\\" <|> punc "λ" "\\" + binders <- name `sepBy1` spaceConsumer + punc "->" <|> punc "↦" "->" abstract binders <$> term where -- Expand an abstraction with multiple variables into its internal representation of -- nested single-variable abstractions. - abstract = flip $ foldr' Abs + abstract = flip (foldr' Abs) -- | Parse an application term. application :: Parser (Term -> Term -> Term) application = return App -- | Parse the initial common fragment of a let term or a let statement. -lettStart :: Parser (Name, Term) -lettStart = do +letStart :: Parser (Name, Term) +letStart = do punc "let" name' <- name "name" punc ":=" @@ -139,9 +134,9 @@ lettStart = do return (name', term') -- | Parse a let term. -lett :: Parser Term -lett = do - (x, t) <- lettStart +let_ :: Parser Term +let_ = do + (x, t) <- letStart punc "in" body <- term -- NOTE: let x := t in body is syntactic sugar for (λx -> body) t. @@ -151,16 +146,16 @@ lett = do term :: Parser Term term = makeExprParser (choice indivisibles) operatorTable where - indivisibles = [variable, natural, boolean, abstraction, lett, brackets term] - operatorTable :: [[Operator Parser Term]] operatorTable = [[InfixL application]] + indivisibles = + [try variable, natural, boolean, abstraction, let_, brackets term] -- | Parse an assignment statement. assignment :: Parser Statement -assignment = try do - (x, t) <- lettStart +assignment = do + (x, t) <- letStart notFollowedBy "in" - return $ Ass x t + return (Ass x t) -- | Parse a term statement. termS :: Parser Statement @@ -168,7 +163,7 @@ termS = Term <$> term -- | Parse a program statement. statement :: Parser Statement -statement = assignment <|> termS +statement = try assignment <|> termS -- | Parse the end of a statement. eos :: Parser ()