Skip to content

Commit

Permalink
Refactor parser
Browse files Browse the repository at this point in the history
  • Loading branch information
severen committed May 29, 2024
1 parent 010938c commit 473c25c
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 50 deletions.
1 change: 1 addition & 0 deletions src/Sly/Eval.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import Data.Set qualified as Set
import Data.Text qualified as T

import Sly.Parser
import Sly.Syntax

-- | Type alias for a map of top-level let bindings.
type Bindings = Seq (Name, Term)
Expand Down
95 changes: 45 additions & 50 deletions src/Sly/Parser.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
-- SPDX-FileCopyrightText: 2022 Severen Redwood <[email protected]>
-- SPDX-License-Identifier: GPL-3.0-or-later

module Sly.Parser (
Name (..),
Term (..),
Statement (..),
parse,
) where
module Sly.Parser (parse) where

import Control.Monad.Combinators.Expr (Operator (..), makeExprParser)
import Data.Foldable (foldr')
Expand All @@ -22,26 +17,25 @@ import qualified Data.Text as T
import qualified Text.Megaparsec.Char.Lexer as L

{- Grammar Notes
* Application is parsed with the highest precedence and associativity to the left, i.e.
f x y = ((f x) y).
* Abstractions are parsed with bodies extending as far to the right as possible, i.e.
λx -> λy -> x y x = (λx -> (λy -> ((x y) x))).
* Bracketing of terms can be used to override precedence and associativity when
required.
* Statements are either an assignment (let X := Y) or a λ-calculus term and end with a
full stop '.'.
For anyone editing this file: One good test to flush out any bugs in the parser is to
check whether the following equality is reflected by the parse trees of the LHS and RHS
respectively:
* Application is parsed with the highest precedence and associativity to the
left, i.e. f x y = ((f x) y).
* Abstractions are parsed with bodies extending as far to the right as
possible, i.e. λx -> λy -> x y x = (λx -> (λy -> ((x y) x))).
* Bracketing of terms can be used to override precedence and associativity
when required.
* Statements are either an assignment (let X := Y) or a λ-calculus term and
end with a full stop '.'.
For anyone editing this file: One good test to flush out any bugs in the
parser is to check whether the following equality is reflected by the parse
trees of the LHS and RHS, respectively:
λf -> (λx -> f (x x)) (λx -> f (x x)) = (λf -> ((λx -> (f (x x))) (λx -> (f (x x))))).
-}

{- | The parser monad.
This is a Parsec type synonym to both help type inference and the compiler's
optimiser.
-}
-- | The parser monad.
--
-- This is a Parsec type synonym to both help type inference and the compiler's
-- optimiser.
type Parser = Parsec Void Text

-- | Words that are reserved as keywords and thus disallowed as names.
Expand Down Expand Up @@ -70,9 +64,8 @@ symbol = L.symbol spaceConsumer
punc :: Text -> Parser ()
punc = void . symbol

{- | Create a parser that applies the given parser to an expression between a pair of
round brackets.
-}
-- | Create a parser that applies the given parser to an expression between a
-- pair of round brackets.
brackets :: Parser a -> Parser a
brackets = between (punc "(") (punc ")")

Expand All @@ -81,24 +74,25 @@ nameStart :: Parser Char
nameStart = satisfy \c -> isXIDStart c && c /= 'λ'

-- | Parse a sequence of 'continue' characters in a name.
nameContinue :: Parser [Char]
nameContinue :: Parser String
nameContinue = many $ satisfy \c -> isXIDContinue c || c == '\''

-- | Parse a name according to the Unicode Standard Annex #31.
name :: Parser Name
name = Name <$> (lexeme . try) (p >>= check)
name = Name <$> lexeme (word >>= check) <?> "name"
where
p = T.pack <$> ((:) <$> nameStart <*> nameContinue)
check s
| s `notElem` keywords = return s
-- TODO: See if the positioning of this error message (when output) can be improved.
| otherwise = fail $ "keyword " <> T.unpack s <> " cannot be a name"
word = T.pack <$> ((:) <$> nameStart <*> nameContinue)
check w
| w `notElem` keywords = return w
-- TODO: See if the positioning of this error message (when output) can be
-- improved.
| otherwise = fail $ "keyword " <> T.unpack w <> " cannot be a name"

-- | Parse a variable term.
variable :: Parser Term
variable = Var <$> name <?> "variable"
variable = Var <$> name

-- | Parse a natural number.
-- | Parse a natural number literal.
natural :: Parser Term
natural = toChurchNat <$> (lexeme L.decimal >>= check)
where
Expand All @@ -107,6 +101,7 @@ natural = toChurchNat <$> (lexeme L.decimal >>= check)
| otherwise = fail $ "naturals larger than " <> show maxInt <> " are disallowed"
maxInt = maxBound @Int

-- | Parse a Boolean literal.
boolean :: Parser Term
boolean = toChurchBool <$> (single '#' *> (try true <|> false))
where
Expand All @@ -116,32 +111,32 @@ boolean = toChurchBool <$> (single '#' *> (try true <|> false))
-- | Parse a λ-abstraction.
abstraction :: Parser Term
abstraction = do
punc "\\" <|> punc "λ"
binders <- (name <?> "variable") `sepBy1` spaceConsumer
punc "->" <|> punc ""
punc "\\" <|> punc "λ" <?> "\\"
binders <- name `sepBy1` spaceConsumer
punc "->" <|> punc "" <?> "->"
abstract binders <$> term
where
-- Expand an abstraction with multiple variables into its internal representation of
-- nested single-variable abstractions.
abstract = flip $ foldr' Abs
abstract = flip (foldr' Abs)

-- | Parse an application term.
application :: Parser (Term -> Term -> Term)
application = return App

-- | Parse the initial common fragment of a let term or a let statement.
lettStart :: Parser (Name, Term)
lettStart = do
letStart :: Parser (Name, Term)
letStart = do
punc "let"
name' <- name <?> "name"
punc ":="
term' <- term
return (name', term')

-- | Parse a let term.
lett :: Parser Term
lett = do
(x, t) <- lettStart
let_ :: Parser Term
let_ = do
(x, t) <- letStart
punc "in"
body <- term
-- NOTE: let x := t in body is syntactic sugar for (λx -> body) t.
Expand All @@ -151,24 +146,24 @@ lett = do
term :: Parser Term
term = makeExprParser (choice indivisibles) operatorTable
where
indivisibles = [variable, natural, boolean, abstraction, lett, brackets term]
operatorTable :: [[Operator Parser Term]]
operatorTable = [[InfixL application]]
indivisibles =
[try variable, natural, boolean, abstraction, let_, brackets term]

-- | Parse an assignment statement.
assignment :: Parser Statement
assignment = try do
(x, t) <- lettStart
assignment = do
(x, t) <- letStart
notFollowedBy "in"
return $ Ass x t
return (Ass x t)

-- | Parse a term statement.
termS :: Parser Statement
termS = Term <$> term

-- | Parse a program statement.
statement :: Parser Statement
statement = assignment <|> termS
statement = try assignment <|> termS

-- | Parse the end of a statement.
eos :: Parser ()
Expand Down

0 comments on commit 473c25c

Please sign in to comment.