-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
46 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,7 @@ | ||
-- SPDX-FileCopyrightText: 2022 Severen Redwood <[email protected]> | ||
-- SPDX-License-Identifier: GPL-3.0-or-later | ||
|
||
module Sly.Parser ( | ||
Name (..), | ||
Term (..), | ||
Statement (..), | ||
parse, | ||
) where | ||
module Sly.Parser (parse) where | ||
|
||
import Control.Monad.Combinators.Expr (Operator (..), makeExprParser) | ||
import Data.Foldable (foldr') | ||
|
@@ -22,26 +17,25 @@ import qualified Data.Text as T | |
import qualified Text.Megaparsec.Char.Lexer as L | ||
|
||
{- Grammar Notes | ||
* Application is parsed with the highest precedence and associativity to the left, i.e. | ||
f x y = ((f x) y). | ||
* Abstractions are parsed with bodies extending as far to the right as possible, i.e. | ||
λx -> λy -> x y x = (λx -> (λy -> ((x y) x))). | ||
* Bracketing of terms can be used to override precedence and associativity when | ||
required. | ||
* Statements are either an assignment (let X := Y) or a λ-calculus term and end with a | ||
full stop '.'. | ||
For anyone editing this file: One good test to flush out any bugs in the parser is to | ||
check whether the following equality is reflected by the parse trees of the LHS and RHS | ||
respectively: | ||
* Application is parsed with the highest precedence and associativity to the | ||
left, i.e. f x y = ((f x) y). | ||
* Abstractions are parsed with bodies extending as far to the right as | ||
possible, i.e. λx -> λy -> x y x = (λx -> (λy -> ((x y) x))). | ||
* Bracketing of terms can be used to override precedence and associativity | ||
when required. | ||
* Statements are either an assignment (let X := Y) or a λ-calculus term and | ||
end with a full stop '.'. | ||
For anyone editing this file: One good test to flush out any bugs in the | ||
parser is to check whether the following equality is reflected by the parse | ||
trees of the LHS and RHS, respectively: | ||
λf -> (λx -> f (x x)) (λx -> f (x x)) = (λf -> ((λx -> (f (x x))) (λx -> (f (x x))))). | ||
-} | ||
|
||
{- | The parser monad. | ||
This is a Parsec type synonym to both help type inference and the compiler's | ||
optimiser. | ||
-} | ||
-- | The parser monad. | ||
-- | ||
-- This is a Parsec type synonym to both help type inference and the compiler's | ||
-- optimiser. | ||
type Parser = Parsec Void Text | ||
|
||
-- | Words that are reserved as keywords and thus disallowed as names. | ||
|
@@ -70,9 +64,8 @@ symbol = L.symbol spaceConsumer | |
punc :: Text -> Parser () | ||
punc = void . symbol | ||
|
||
{- | Create a parser that applies the given parser to an expression between a pair of | ||
round brackets. | ||
-} | ||
-- | Create a parser that applies the given parser to an expression between a | ||
-- pair of round brackets. | ||
brackets :: Parser a -> Parser a | ||
brackets = between (punc "(") (punc ")") | ||
|
||
|
@@ -81,24 +74,25 @@ nameStart :: Parser Char | |
nameStart = satisfy \c -> isXIDStart c && c /= 'λ' | ||
|
||
-- | Parse a sequence of 'continue' characters in a name. | ||
nameContinue :: Parser [Char] | ||
nameContinue :: Parser String | ||
nameContinue = many $ satisfy \c -> isXIDContinue c || c == '\'' | ||
|
||
-- | Parse a name according to the Unicode Standard Annex #31. | ||
name :: Parser Name | ||
name = Name <$> (lexeme . try) (p >>= check) | ||
name = Name <$> lexeme (word >>= check) <?> "name" | ||
where | ||
p = T.pack <$> ((:) <$> nameStart <*> nameContinue) | ||
check s | ||
| s `notElem` keywords = return s | ||
-- TODO: See if the positioning of this error message (when output) can be improved. | ||
| otherwise = fail $ "keyword " <> T.unpack s <> " cannot be a name" | ||
word = T.pack <$> ((:) <$> nameStart <*> nameContinue) | ||
check w | ||
| w `notElem` keywords = return w | ||
-- TODO: See if the positioning of this error message (when output) can be | ||
-- improved. | ||
| otherwise = fail $ "keyword " <> T.unpack w <> " cannot be a name" | ||
|
||
-- | Parse a variable term. | ||
variable :: Parser Term | ||
variable = Var <$> name <?> "variable" | ||
variable = Var <$> name | ||
|
||
-- | Parse a natural number. | ||
-- | Parse a natural number literal. | ||
natural :: Parser Term | ||
natural = toChurchNat <$> (lexeme L.decimal >>= check) | ||
where | ||
|
@@ -107,6 +101,7 @@ natural = toChurchNat <$> (lexeme L.decimal >>= check) | |
| otherwise = fail $ "naturals larger than " <> show maxInt <> " are disallowed" | ||
maxInt = maxBound @Int | ||
|
||
-- | Parse a Boolean literal. | ||
boolean :: Parser Term | ||
boolean = toChurchBool <$> (single '#' *> (try true <|> false)) | ||
where | ||
|
@@ -116,32 +111,32 @@ boolean = toChurchBool <$> (single '#' *> (try true <|> false)) | |
-- | Parse a λ-abstraction. | ||
abstraction :: Parser Term | ||
abstraction = do | ||
punc "\\" <|> punc "λ" | ||
binders <- (name <?> "variable") `sepBy1` spaceConsumer | ||
punc "->" <|> punc "↦" | ||
punc "\\" <|> punc "λ" <?> "\\" | ||
binders <- name `sepBy1` spaceConsumer | ||
punc "->" <|> punc "↦" <?> "->" | ||
abstract binders <$> term | ||
where | ||
-- Expand an abstraction with multiple variables into its internal representation of | ||
-- nested single-variable abstractions. | ||
abstract = flip $ foldr' Abs | ||
abstract = flip (foldr' Abs) | ||
|
||
-- | Parse an application term. | ||
application :: Parser (Term -> Term -> Term) | ||
application = return App | ||
|
||
-- | Parse the initial common fragment of a let term or a let statement. | ||
lettStart :: Parser (Name, Term) | ||
lettStart = do | ||
letStart :: Parser (Name, Term) | ||
letStart = do | ||
punc "let" | ||
name' <- name <?> "name" | ||
punc ":=" | ||
term' <- term | ||
return (name', term') | ||
|
||
-- | Parse a let term. | ||
lett :: Parser Term | ||
lett = do | ||
(x, t) <- lettStart | ||
let_ :: Parser Term | ||
let_ = do | ||
(x, t) <- letStart | ||
punc "in" | ||
body <- term | ||
-- NOTE: let x := t in body is syntactic sugar for (λx -> body) t. | ||
|
@@ -151,24 +146,24 @@ lett = do | |
term :: Parser Term | ||
term = makeExprParser (choice indivisibles) operatorTable | ||
where | ||
indivisibles = [variable, natural, boolean, abstraction, lett, brackets term] | ||
operatorTable :: [[Operator Parser Term]] | ||
operatorTable = [[InfixL application]] | ||
indivisibles = | ||
[try variable, natural, boolean, abstraction, let_, brackets term] | ||
|
||
-- | Parse an assignment statement. | ||
assignment :: Parser Statement | ||
assignment = try do | ||
(x, t) <- lettStart | ||
assignment = do | ||
(x, t) <- letStart | ||
notFollowedBy "in" | ||
return $ Ass x t | ||
return (Ass x t) | ||
|
||
-- | Parse a term statement. | ||
termS :: Parser Statement | ||
termS = Term <$> term | ||
|
||
-- | Parse a program statement. | ||
statement :: Parser Statement | ||
statement = assignment <|> termS | ||
statement = try assignment <|> termS | ||
|
||
-- | Parse the end of a statement. | ||
eos :: Parser () | ||
|