diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index fc8c7da..9547634 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -47,7 +47,7 @@ The ICU message parser is a [recursive descent parser](https://en.wikipedia.org/
For example, given an ICU message `hello {name}`, we’d first try parsing for an interpolation or tag, and failing that would parse plaintext, which we’d do until we encountered a reason to stop, in this case the tag opening character. We’ve stored "hello " as plaintext and will now continue along, this time succeeding in parsing the tag. We’ll now recursively parse inside the bounds of the tag, reusing the same top-level parser we were just using, this time parsing an interpolation. Having done this we’ve _consumed_ the entire input string and have successfully parsed a recursive list of nodes making up our [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree).
-JSON parsing is offloaded to [aeson](https://hackage.haskell.org/package/aeson).
+JSON parsing is handled internally for better interop with the ICU parser. JSON encoding is offloaded to [aeson](https://hackage.haskell.org/package/aeson).
### Compilation
diff --git a/README.md b/README.md
index f6b3e89..c143103 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ Translation files should be encoded as JSON and might look something like this:
{
"welcome": {
"message": "Hello {name}",
+ "description": "Welcome message",
"backend": "ts"
}
}
diff --git a/cli/Main.hs b/cli/Main.hs
index 3bf54e5..d412957 100644
--- a/cli/Main.hs
+++ b/cli/Main.hs
@@ -1,10 +1,11 @@
module Main where
-import CLI (Opts (..), getOpts)
-import qualified Data.Text as T
-import Intlc.Compiler (compileDataset, compileFlattened)
+import CLI (Opts (..), getOpts)
+import qualified Data.Text as T
+import Intlc.Compiler (compileDataset, compileFlattened)
import Intlc.Core
-import Intlc.Parser (ParseFailure, parseDataset, printErr)
+import Intlc.Parser (parseDataset, printErr)
+import Intlc.Parser.Error (ParseFailure)
import Prelude
main :: IO ()
@@ -16,5 +17,4 @@ main = getOpts >>= \case
compilerDie = die . T.unpack . ("Invalid keys:\n" <>) . T.intercalate "\n" . fmap ("\t" <>) . toList
getParsed :: FilePath -> IO (Either ParseFailure (Dataset Translation))
-getParsed = fmap parseDataset . readFileLBS
-
+getParsed x = parseDataset x <$> readFileText x
diff --git a/intlc.cabal b/intlc.cabal
index d65c4b5..41dbe0c 100644
--- a/intlc.cabal
+++ b/intlc.cabal
@@ -55,6 +55,9 @@ library
Intlc.Core
Intlc.ICU
Intlc.Parser
+ Intlc.Parser.Error
+ Intlc.Parser.JSON
+ Intlc.Parser.ICU
Utils
test-suite test-intlc
@@ -78,4 +81,5 @@ test-suite test-intlc
Intlc.Backend.TypeScriptSpec
Intlc.CompilerSpec
Intlc.EndToEndSpec
- Intlc.ParserSpec
+ Intlc.Parser.JSONSpec
+ Intlc.Parser.ICUSpec
diff --git a/lib/Intlc/Core.hs b/lib/Intlc/Core.hs
index 10c735f..7ab86be 100644
--- a/lib/Intlc/Core.hs
+++ b/lib/Intlc/Core.hs
@@ -2,11 +2,8 @@
module Intlc.Core where
-import Data.Aeson (FromJSON (..), ToJSON (toEncoding),
- withObject, withText, (.!=), (.:), (.:?),
- (.=))
+import Data.Aeson (ToJSON (toEncoding), (.=))
import Data.Aeson.Encoding (pairs, string)
-import qualified Data.Text as T
import Intlc.ICU (Message)
import Prelude
@@ -22,12 +19,6 @@ data Backend
| TypeScriptReact
deriving (Show, Eq, Generic)
-instance FromJSON Backend where
- parseJSON = withText "Backend" decode
- where decode "ts" = pure TypeScript
- decode "tsx" = pure TypeScriptReact
- decode x = fail $ "Unknown backend: " <> T.unpack x
-
instance ToJSON Backend where
toEncoding TypeScript = string "ts"
toEncoding TypeScriptReact = string "tsx"
@@ -39,13 +30,6 @@ data UnparsedTranslation = UnparsedTranslation
}
deriving (Show, Eq, Generic)
-instance FromJSON UnparsedTranslation where
- parseJSON = withObject "UnparsedTranslation" decode
- where decode x = UnparsedTranslation
- <$> x .: "message"
- <*> x .:? "backend" .!= TypeScript
- <*> x .:? "description"
-
instance ToJSON UnparsedTranslation where
toEncoding (UnparsedTranslation msg be md) = pairs $
"message" .= msg
diff --git a/lib/Intlc/Parser.hs b/lib/Intlc/Parser.hs
index bf2f3b3..04a3cd7 100644
--- a/lib/Intlc/Parser.hs
+++ b/lib/Intlc/Parser.hs
@@ -1,239 +1,14 @@
--- This module follows the following whitespace rules:
--- * Consume all whitespace after tokens where possible.
--- * Therefore, assume no whitespace before tokens.
-
-{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
-{-# HLINT ignore "Use newtype instead of data" #-}
-
module Intlc.Parser where
-import qualified Control.Applicative.Combinators.NonEmpty as NE
-import Data.Aeson (decode)
-import Data.ByteString.Lazy (ByteString)
-import qualified Data.Map as M
-import qualified Data.Text as T
-import Data.Validation (toEither,
- validationNel)
-import Data.Void ()
import Intlc.Core
-import Intlc.ICU
-import Prelude hiding (ByteString)
-import Text.Megaparsec hiding (State, Stream,
- Token, many, some,
- token)
-import Text.Megaparsec.Char
-import qualified Text.Megaparsec.Char.Lexer as L
-import Text.Megaparsec.Error.Builder
-
-type ParseErr = ParseErrorBundle Text MessageParseErr
-
-data ParseFailure
- = FailedJsonParse
- | FailedDatasetParse (NonEmpty ParseErr)
- deriving (Show, Eq)
+import Intlc.Parser.Error (ParseFailure)
+import Intlc.Parser.JSON (ParserState (ParserState), dataset)
+import Prelude
+import Text.Megaparsec (runParser)
+import Text.Megaparsec.Error
-data MessageParseErr
- = NoClosingCallbackTag Text
- | BadClosingCallbackTag Text Text
- deriving (Show, Eq, Ord)
-
-instance ShowErrorComponent MessageParseErr where
- showErrorComponent (NoClosingCallbackTag x) = "Callback tag <" <> T.unpack x <> "> not closed"
- showErrorComponent (BadClosingCallbackTag x y) = "Callback tag <" <> T.unpack x <> "> not closed, instead found " <> T.unpack y <> ">"
-
-failingWith :: MonadParsec e s m => Int -> e -> m a
-pos `failingWith` e = parseError . errFancy pos . fancy . ErrorCustom $ e
+parseDataset :: FilePath -> Text -> Either ParseFailure (Dataset Translation)
+parseDataset = runParser (evalStateT dataset (ParserState mempty))
printErr :: ParseFailure -> String
-printErr FailedJsonParse = "Failed to parse JSON"
-printErr (FailedDatasetParse es) = intercalate "\n" . toList . fmap errorBundlePretty $ es
-
-parseDataset :: ByteString -> Either ParseFailure (Dataset Translation)
-parseDataset = parse' <=< decode'
- where decode' = maybeToRight FailedJsonParse . decode
- parse' = toEither . first FailedDatasetParse . M.traverseWithKey ((validationNel .) . parseTranslationFor)
-
-parseTranslationFor :: Text -> UnparsedTranslation -> Either ParseErr Translation
-parseTranslationFor name (UnparsedTranslation umsg be md) = do
- msg' <- runParser (runReaderT msg initialState) (T.unpack name) umsg
- pure $ Translation msg' be md
-
-data ParserState = ParserState
- { pluralCtxName :: Maybe Text
- }
-
-initialState :: ParserState
-initialState = ParserState mempty
-
-type Parser = ReaderT ParserState (Parsec MessageParseErr Text)
-
-ident :: Parser Text
-ident = T.pack <$> some letterChar
-
-msg :: Parser Message
-msg = f . mergePlaintext <$> manyTill token eof
- where f [] = Static ""
- f [Plaintext x] = Static x
- f (x:xs) = Dynamic (x :| xs)
-
-token :: Parser Token
-token = choice
- [ Interpolation <$> (interp <|> callback)
- -- Plural cases support interpolating the number/argument in context with
- -- `#`. When there's no such context, fail the parse in effect treating it
- -- as plaintext.
- , asks pluralCtxName >>= \case
- Just n -> Interpolation (Arg n PluralRef) <$ string "#"
- Nothing -> empty
- , Plaintext <$> (try escaped <|> plaintext)
- ]
-
-plaintext :: Parser Text
-plaintext = T.singleton <$> L.charLiteral
-
-escaped :: Parser Text
-escaped = apos *> choice
- -- Double escape two apostrophes as one: "''" -> "'"
- [ "'" <$ apos
- -- Escape everything until another apostrophe, being careful of internal
- -- double escapes: "'{a''}'" -> "{a'}"
- , try $ T.pack <$> someTillNotDouble L.charLiteral apos
- -- Escape the next syntax character as plaintext: "'{" -> "{"
- , T.singleton <$> syn
- ]
- where apos = char '\''
- syn = char '{' <|> char '<'
- -- Like `someTill`, but doesn't end upon encountering two `end` tokens,
- -- instead consuming them as one and continuing.
- someTillNotDouble p end = tryOne
- where tryOne = (:) <$> p <*> go
- go = ((:) <$> try (end <* end) <*> go) <|> (mempty <$ end) <|> tryOne
-
-callback :: Parser Arg
-callback = do
- oname <- string "<" *> ident <* string ">"
- mrest <- observing ((,,) <$> children <* string "" <*> getOffset <*> ident <* string ">")
- case mrest of
- Left _ -> 1 `failingWith` NoClosingCallbackTag oname
- Right (ch, pos, cname) -> if oname == cname
- then pure (Arg oname ch)
- else pos `failingWith` BadClosingCallbackTag oname cname
- where children = Callback . mergePlaintext <$> manyTill token (lookAhead $ string "")
-
-interp :: Parser Arg
-interp = do
- n <- string "{" *> ident
- Arg n <$> choice
- [ String <$ string "}"
- , sep *> body n <* string "}"
- ]
- where sep = string "," <* hspace1
- body n = choice
- [ uncurry Bool <$> (string "boolean" *> sep *> boolCases)
- , Number <$ string "number"
- , Date <$> (string "date" *> sep *> dateTimeFmt)
- , Time <$> (string "time" *> sep *> dateTimeFmt)
- , Plural <$> withPluralCtx n (
- string "plural" *> sep *> cardinalPluralCases
- <|> string "selectordinal" *> sep *> ordinalPluralCases
- )
- , uncurry Select <$> (string "select" *> sep *> selectCases)
- ]
- withPluralCtx n = withReaderT (const . ParserState . pure $ n)
-
-dateTimeFmt :: Parser DateTimeFmt
-dateTimeFmt = choice
- [ Short <$ string "short"
- , Medium <$ string "medium"
- , Long <$ string "long"
- , Full <$ string "full"
- ]
-
-caseBody :: Parser Stream
-caseBody = mergePlaintext <$> (string "{" *> manyTill token (string "}"))
-
-boolCases :: Parser (Stream, Stream)
-boolCases = (,)
- <$> (string "true" *> hspace1 *> caseBody)
- <* hspace1
- <*> (string "false" *> hspace1 *> caseBody)
-
-selectCases :: Parser (NonEmpty SelectCase, Maybe SelectWildcard)
-selectCases = (,) <$> cases <*> optional wildcard
- where cases = NE.sepEndBy1 (SelectCase <$> (name <* hspace1) <*> caseBody) hspace1
- wildcard = SelectWildcard <$> (string wildcardName *> hspace1 *> caseBody)
- name = try $ mfilter (/= wildcardName) ident
- wildcardName = "other"
-
-cardinalPluralCases :: Parser Plural
-cardinalPluralCases = fmap Cardinal . tryClassify =<< p
- where tryClassify = maybe empty pure . uncurry classifyCardinal
- p = (,) <$> disorderedPluralCases <*> optional pluralWildcard
-
-ordinalPluralCases :: Parser Plural
-ordinalPluralCases = fmap Ordinal . tryClassify =<< p
- where tryClassify = maybe empty pure . uncurry classifyOrdinal
- p = (,) <$> disorderedPluralCases <*> pluralWildcard
-
--- Need to lift parsed plural cases into this type to make the list homogeneous.
-data ParsedPluralCase
- = ParsedExact (PluralCase PluralExact)
- | ParsedRule (PluralCase PluralRule)
-
-disorderedPluralCases :: Parser (NonEmpty ParsedPluralCase)
-disorderedPluralCases = flip NE.sepEndBy1 hspace1 $ choice
- [ (ParsedExact .) . PluralCase <$> pluralExact <* hspace1 <*> caseBody
- , (ParsedRule .) . PluralCase <$> pluralRule <* hspace1 <*> caseBody
- ]
-
-pluralExact :: Parser PluralExact
-pluralExact = PluralExact . T.pack <$> (string "=" *> some numberChar)
-
-pluralRule :: Parser PluralRule
-pluralRule = choice
- [ Zero <$ string "zero"
- , One <$ string "one"
- , Two <$ string "two"
- , Few <$ string "few"
- , Many <$ string "many"
- ]
-
-pluralWildcard :: Parser PluralWildcard
-pluralWildcard = PluralWildcard <$> (string "other" *> hspace1 *> caseBody)
-
--- | To simplify parsing cases we validate after-the-fact here. This achieves
--- two purposes. Firstly it enables us to fail the parse if the cases are not
--- exclusively literals and there's no wildcard (see below), and secondly it
--- allows us to organise the cases into the appropriate `Plural` constructors,
--- which in turn enables more efficient codegen later on.
---
--- =0 {} =1 {} -- Lit
--- =0 {} =1 {} other {} -- Lit
--- one {} two {} other {} -- Rule
--- =0 {} one {} other {} -- Mixed
---
-classifyCardinal :: Foldable f => f ParsedPluralCase -> Maybe PluralWildcard -> Maybe CardinalPlural
-classifyCardinal xs mw =
- case (organisePluralCases xs, mw) of
- ((Just ls, Nothing), mw') -> Just (LitPlural ls mw')
- ((Nothing, Just rs), Just w) -> Just (RulePlural rs w)
- ((Just ls, Just rs), Just w) -> Just (MixedPlural ls rs w)
- -- Rule plurals require a wildcard.
- ((_, Just _), Nothing) -> Nothing
- -- We should have parsed and organised at least one case somewhere.
- ((Nothing, Nothing), _) -> Nothing
-
--- | This is simpler than its cardinal counterpart. Here we need only validate
--- that there is at least one rule case. This is performed here to simplify
--- supporting disordered cases in the parser (whereas validating the presence
--- of a wildcard at the end is trivial in the parser).
-classifyOrdinal :: Foldable f => f ParsedPluralCase -> PluralWildcard -> Maybe OrdinalPlural
-classifyOrdinal xs w =
- case organisePluralCases xs of
- (_, Nothing) -> Nothing
- (mls, Just rs) -> Just $ OrdinalPlural (foldMap toList mls) rs w
-
-organisePluralCases :: Foldable f => f ParsedPluralCase -> (Maybe (NonEmpty (PluralCase PluralExact)), Maybe (NonEmpty (PluralCase PluralRule)))
-organisePluralCases = bimap nonEmpty nonEmpty . foldr f mempty
- where f (ParsedExact x) = first (x:)
- f (ParsedRule x) = second (x:)
+printErr = errorBundlePretty
diff --git a/lib/Intlc/Parser/Error.hs b/lib/Intlc/Parser/Error.hs
new file mode 100644
index 0000000..552dcb8
--- /dev/null
+++ b/lib/Intlc/Parser/Error.hs
@@ -0,0 +1,44 @@
+-- Our parsers are unavoidably tied together, and it's easiest if dependent
+-- parsers share the same error type, but we also need to avoid cyclic
+-- dependencies - so it all lives here.
+
+{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
+{-# HLINT ignore "Use newtype instead of data" #-}
+
+module Intlc.Parser.Error where
+
+import qualified Data.Text as T
+import Prelude
+import Text.Megaparsec (MonadParsec (parseError))
+import Text.Megaparsec.Error
+import Text.Megaparsec.Error.Builder
+
+type ParseFailure = ParseErrorBundle Text ParseErr
+
+data ParseErr
+ = FailedJSONParse JSONParseErr
+ | FailedMsgParse MessageParseErr
+ deriving (Show, Eq, Ord)
+
+data JSONParseErr
+ = DuplicateKey Text
+ deriving (Show, Eq, Ord)
+
+data MessageParseErr
+ = NoClosingCallbackTag Text
+ | BadClosingCallbackTag Text Text
+ deriving (Show, Eq, Ord)
+
+instance ShowErrorComponent ParseErr where
+ showErrorComponent (FailedJSONParse e) = showErrorComponent e
+ showErrorComponent (FailedMsgParse e) = showErrorComponent e
+
+instance ShowErrorComponent JSONParseErr where
+ showErrorComponent (DuplicateKey k) = "Duplicate key: \"" <> T.unpack k <> "\""
+
+instance ShowErrorComponent MessageParseErr where
+ showErrorComponent (NoClosingCallbackTag x) = "Callback tag <" <> T.unpack x <> "> not closed"
+ showErrorComponent (BadClosingCallbackTag x y) = "Callback tag <" <> T.unpack x <> "> not closed, instead found " <> T.unpack y <> ">"
+
+failingWith :: MonadParsec e s m => Int -> e -> m a
+pos `failingWith` e = parseError . errFancy pos . fancy . ErrorCustom $ e
diff --git a/lib/Intlc/Parser/ICU.hs b/lib/Intlc/Parser/ICU.hs
new file mode 100644
index 0000000..d64649d
--- /dev/null
+++ b/lib/Intlc/Parser/ICU.hs
@@ -0,0 +1,210 @@
+-- This module follows the following whitespace rules:
+-- * Consume all whitespace after tokens where possible.
+-- * Therefore, assume no whitespace before tokens.
+
+{-# LANGUAGE FlexibleContexts #-}
+
+{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
+{-# HLINT ignore "Use newtype instead of data" #-}
+
+module Intlc.Parser.ICU where
+
+import qualified Control.Applicative.Combinators.NonEmpty as NE
+import qualified Data.Text as T
+import Data.Void ()
+import Intlc.ICU
+import Intlc.Parser.Error (MessageParseErr (..),
+ ParseErr (FailedMsgParse),
+ failingWith)
+import Prelude
+import Text.Megaparsec hiding (State, Stream,
+ Token, many, some,
+ token)
+import Text.Megaparsec.Char
+import qualified Text.Megaparsec.Char.Lexer as L
+
+failingWith' :: MonadParsec ParseErr s m => Int -> MessageParseErr -> m a
+i `failingWith'` e = i `failingWith` FailedMsgParse e
+
+data ParserState = ParserState
+ { pluralCtxName :: Maybe Text
+ }
+
+initialState :: ParserState
+initialState = ParserState mempty
+
+type Parser = ReaderT ParserState (Parsec ParseErr Text)
+
+ident :: Parser Text
+ident = T.pack <$> some letterChar
+
+toMsg :: Stream -> Message
+toMsg = mergePlaintext >>> \case
+ [] -> Static ""
+ [Plaintext x] -> Static x
+ (x:xs) -> Dynamic (x :| xs)
+
+msg :: Parser Message
+msg = toMsg <$> manyTill token eof
+
+eom :: Parser ()
+eom = void $ char '"'
+
+token :: Parser Token
+token = choice
+ [ Interpolation <$> (interp <|> callback)
+ -- Plural cases support interpolating the number/argument in context with
+ -- `#`. When there's no such context, fail the parse in effect treating it
+ -- as plaintext.
+ , asks pluralCtxName >>= \case
+ Just n -> Interpolation (Arg n PluralRef) <$ string "#"
+ Nothing -> empty
+ , Plaintext <$> (try escaped <|> plaintext)
+ ]
+
+plaintext :: Parser Text
+plaintext = T.singleton <$> L.charLiteral
+
+escaped :: Parser Text
+escaped = apos *> choice
+ -- Double escape two apostrophes as one: "''" -> "'"
+ [ "'" <$ apos
+ -- Escape everything until another apostrophe, being careful of internal
+ -- double escapes: "'{a''}'" -> "{a'}"
+ , try $ T.pack <$> someTillNotDouble L.charLiteral apos
+ -- Escape the next syntax character as plaintext: "'{" -> "{"
+ , T.singleton <$> syn
+ ]
+ where apos = char '\''
+ syn = char '{' <|> char '<'
+ -- Like `someTill`, but doesn't end upon encountering two `end` tokens,
+ -- instead consuming them as one and continuing.
+ someTillNotDouble p end = tryOne
+ where tryOne = (:) <$> p <*> go
+ go = ((:) <$> try (end <* end) <*> go) <|> (mempty <$ end) <|> tryOne
+
+callback :: Parser Arg
+callback = do
+ (openPos, oname) <- (,) <$> (string "<" *> getOffset) <*> ident <* string ">"
+ mrest <- observing ((,,) <$> children <* string "" <*> getOffset <*> ident <* string ">")
+ case mrest of
+ Left _ -> openPos `failingWith'` NoClosingCallbackTag oname
+ Right (ch, closePos, cname) -> if oname == cname
+ then pure (Arg oname ch)
+ else closePos `failingWith'` BadClosingCallbackTag oname cname
+ where children = Callback . mergePlaintext <$> manyTill token (lookAhead $ void (string "") <|> eom)
+
+interp :: Parser Arg
+interp = between (char '{') (char '}') $ do
+ n <- ident
+ Arg n <$> option String (sep *> body n)
+ where sep = string "," <* hspace1
+ body n = choice
+ [ uncurry Bool <$> (string "boolean" *> sep *> boolCases)
+ , Number <$ string "number"
+ , Date <$> (string "date" *> sep *> dateTimeFmt)
+ , Time <$> (string "time" *> sep *> dateTimeFmt)
+ , Plural <$> withPluralCtx n (
+ string "plural" *> sep *> cardinalPluralCases
+ <|> string "selectordinal" *> sep *> ordinalPluralCases
+ )
+ , uncurry Select <$> (string "select" *> sep *> selectCases)
+ ]
+ withPluralCtx n = withReaderT (const . ParserState . pure $ n)
+
+dateTimeFmt :: Parser DateTimeFmt
+dateTimeFmt = choice
+ [ Short <$ string "short"
+ , Medium <$ string "medium"
+ , Long <$ string "long"
+ , Full <$ string "full"
+ ]
+
+caseBody :: Parser Stream
+caseBody = mergePlaintext <$> (string "{" *> manyTill token (string "}"))
+
+boolCases :: Parser (Stream, Stream)
+boolCases = (,)
+ <$> (string "true" *> hspace1 *> caseBody)
+ <* hspace1
+ <*> (string "false" *> hspace1 *> caseBody)
+
+selectCases :: Parser (NonEmpty SelectCase, Maybe SelectWildcard)
+selectCases = (,) <$> cases <*> optional wildcard
+ where cases = NE.sepEndBy1 (SelectCase <$> (name <* hspace1) <*> caseBody) hspace1
+ wildcard = SelectWildcard <$> (string wildcardName *> hspace1 *> caseBody)
+ name = try $ mfilter (/= wildcardName) ident
+ wildcardName = "other"
+
+cardinalPluralCases :: Parser Plural
+cardinalPluralCases = fmap Cardinal . tryClassify =<< p
+ where tryClassify = maybe empty pure . uncurry classifyCardinal
+ p = (,) <$> disorderedPluralCases <*> optional pluralWildcard
+
+ordinalPluralCases :: Parser Plural
+ordinalPluralCases = fmap Ordinal . tryClassify =<< p
+ where tryClassify = maybe empty pure . uncurry classifyOrdinal
+ p = (,) <$> disorderedPluralCases <*> pluralWildcard
+
+-- Need to lift parsed plural cases into this type to make the list homogeneous.
+data ParsedPluralCase
+ = ParsedExact (PluralCase PluralExact)
+ | ParsedRule (PluralCase PluralRule)
+
+disorderedPluralCases :: Parser (NonEmpty ParsedPluralCase)
+disorderedPluralCases = flip NE.sepEndBy1 hspace1 $ choice
+ [ (ParsedExact .) . PluralCase <$> pluralExact <* hspace1 <*> caseBody
+ , (ParsedRule .) . PluralCase <$> pluralRule <* hspace1 <*> caseBody
+ ]
+
+pluralExact :: Parser PluralExact
+pluralExact = PluralExact . T.pack <$> (string "=" *> some numberChar)
+
+pluralRule :: Parser PluralRule
+pluralRule = choice
+ [ Zero <$ string "zero"
+ , One <$ string "one"
+ , Two <$ string "two"
+ , Few <$ string "few"
+ , Many <$ string "many"
+ ]
+
+pluralWildcard :: Parser PluralWildcard
+pluralWildcard = PluralWildcard <$> (string "other" *> hspace1 *> caseBody)
+
+-- | To simplify parsing cases we validate after-the-fact here. This achieves
+-- two purposes. Firstly it enables us to fail the parse if the cases are not
+-- exclusively literals and there's no wildcard (see below), and secondly it
+-- allows us to organise the cases into the appropriate `Plural` constructors,
+-- which in turn enables more efficient codegen later on.
+--
+-- =0 {} =1 {} -- Lit
+-- =0 {} =1 {} other {} -- Lit
+-- one {} two {} other {} -- Rule
+-- =0 {} one {} other {} -- Mixed
+--
+classifyCardinal :: Foldable f => f ParsedPluralCase -> Maybe PluralWildcard -> Maybe CardinalPlural
+classifyCardinal xs mw =
+ case (organisePluralCases xs, mw) of
+ ((Just ls, Nothing), mw') -> Just (LitPlural ls mw')
+ ((Nothing, Just rs), Just w) -> Just (RulePlural rs w)
+ ((Just ls, Just rs), Just w) -> Just (MixedPlural ls rs w)
+ -- Rule plurals require a wildcard.
+ ((_, Just _), Nothing) -> Nothing
+ -- We should have parsed and organised at least one case somewhere.
+ ((Nothing, Nothing), _) -> Nothing
+
+-- | This is simpler than its cardinal counterpart. Here we need only validate
+-- that there is at least one rule case. This is performed here to simplify
+-- supporting disordered cases in the parser (whereas validating the presence
+-- of a wildcard at the end is trivial in the parser).
+classifyOrdinal :: Foldable f => f ParsedPluralCase -> PluralWildcard -> Maybe OrdinalPlural
+classifyOrdinal xs w =
+ case organisePluralCases xs of
+ (_, Nothing) -> Nothing
+ (mls, Just rs) -> Just $ OrdinalPlural (foldMap toList mls) rs w
+
+organisePluralCases :: Foldable f => f ParsedPluralCase -> (Maybe (NonEmpty (PluralCase PluralExact)), Maybe (NonEmpty (PluralCase PluralRule)))
+organisePluralCases = bimap nonEmpty nonEmpty . foldr f mempty
+ where f (ParsedExact x) = first (x:)
+ f (ParsedRule x) = second (x:)
diff --git a/lib/Intlc/Parser/JSON.hs b/lib/Intlc/Parser/JSON.hs
new file mode 100644
index 0000000..c1ff90e
--- /dev/null
+++ b/lib/Intlc/Parser/JSON.hs
@@ -0,0 +1,108 @@
+-- An in-house JSON parser specialised to our needs, piggybacking off of the
+-- sibling ICU parser. Allows interop with our ICU parser and bypasses some
+-- Aeson limitations.
+--
+-- This module follows the following whitespace rules:
+-- * Consume all whitespace after tokens where possible.
+-- * Therefore, assume no whitespace before tokens.
+
+{-# LANGUAGE FlexibleContexts #-}
+
+{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
+{-# HLINT ignore "Use newtype instead of data" #-}
+
+module Intlc.Parser.JSON where
+
+import Control.Applicative.Permutations
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import qualified Data.Text as T
+import Data.Void ()
+import Intlc.Core
+import qualified Intlc.ICU as ICU
+import Intlc.Parser.Error (JSONParseErr (..),
+ ParseErr (FailedJSONParse),
+ failingWith)
+import Intlc.Parser.ICU (initialState, toMsg, token)
+import Prelude hiding (null)
+import Text.Megaparsec hiding (State, Stream, Token,
+ many, some, token)
+import Text.Megaparsec.Char
+import qualified Text.Megaparsec.Char.Lexer as L
+import Text.Megaparsec.Error.Builder (errFancy, fancy)
+
+type Parser = StateT ParserState (Parsec ParseErr Text)
+
+data ParserState = ParserState
+ { keys :: Set Text
+ }
+
+failingWith' :: MonadParsec ParseErr s m => Int -> JSONParseErr -> m a
+i `failingWith'` e = i `failingWith` FailedJSONParse e
+
+dataset :: Parser (Dataset Translation)
+dataset = space *> objMap translation <* space <* eof
+
+-- It's important to use `toPermutationWithDefault` as opposed to standard
+-- parser combinators like `optional` so that `intercalateEffect` can do its
+-- magic.
+--
+-- Additionally, the consistent application of whitespace is extremely
+-- important, and the permutation appears to operate over the first parser, so
+-- be careful around any abstractions around the key double quotes.
+translation :: Parser Translation
+translation = obj $ intercalateEffect objSep $ Translation
+ <$> toPermutation (objPair' "message" msg)
+ <*> toPermutationWithDefault TypeScript (objPair' "backend" (backendp <|> TypeScript <$ null))
+ <*> toPermutationWithDefault Nothing (objPair' "description" (Just <$> strLit <|> Nothing <$ null))
+
+msg :: Parser ICU.Message
+msg = lift $ withRecovery recover p
+ where p = toMsg <$> runReaderT (char '"' *> manyTill token (char '"')) initialState
+ recover e = error "absurd" <$ consume <* registerParseError e
+ -- Once we've recovered we need to consume the rest of the message
+ -- string so that parsing can continue beyond it.
+ consume = void $ manyTill L.charLiteral (char '"')
+
+backendp :: Parser Backend
+backendp = choice
+ [ TypeScript <$ string (dblqts "ts")
+ , TypeScriptReact <$ string (dblqts "tsx")
+ ]
+
+null :: Parser ()
+null = void $ string "null"
+
+strLit :: Parser Text
+strLit = (T.pack <$>) $ char '"' *> manyTill L.charLiteral (char '"')
+
+dblqtsp :: Parser a -> Parser a
+dblqtsp = between (char '"') (char '"')
+
+dblqts :: Text -> Text
+dblqts x = "\"" <> x <> "\""
+
+-- Parse a homogeneous object of arbitrary keys, failing with recovery upon the
+-- presence of duplicate keys.
+objMap :: Parser a -> Parser (Map Text a)
+objMap v = fmap M.fromList . obj $ sepEndBy (objPair newKey v) objSep
+ where newKey = do
+ i <- getOffset
+ k <- strLit
+ prev <- gets keys
+ if Set.member k prev
+ then registerParseError . errFancy i . fancy . ErrorCustom . FailedJSONParse . DuplicateKey $ k
+ else modify (\x -> x { keys = Set.insert k prev })
+ pure k
+
+obj :: Parser a -> Parser a
+obj p = string "{" *> space *> p <* space <* string "}"
+
+objPair :: Parser Text -> Parser a -> Parser (Text, a)
+objPair k v = (,) <$> k <*> (space *> char ':' *> space *> v)
+
+objPair' :: Text -> Parser a -> Parser a
+objPair' k v = snd <$> objPair (string (dblqts k)) v
+
+objSep :: Parser ()
+objSep = void $ char ',' <* space
diff --git a/test/Intlc/Backend/TypeScriptSpec.hs b/test/Intlc/Backend/TypeScriptSpec.hs
index 20f1ac6..eab0551 100644
--- a/test/Intlc/Backend/TypeScriptSpec.hs
+++ b/test/Intlc/Backend/TypeScriptSpec.hs
@@ -7,7 +7,7 @@ import Intlc.Backend.TypeScript.Compiler (compileNamedExport,
import qualified Intlc.Backend.TypeScript.Language as TS
import Intlc.Core (Locale (Locale))
import qualified Intlc.ICU as ICU
-import Prelude hiding (ByteString)
+import Prelude
import System.FilePath ((<.>), (>))
import Test.Hspec
import Test.Hspec.Golden (Golden (..), defaultGolden)
diff --git a/test/Intlc/EndToEndSpec.hs b/test/Intlc/EndToEndSpec.hs
index 4bc1c57..c33a2a7 100644
--- a/test/Intlc/EndToEndSpec.hs
+++ b/test/Intlc/EndToEndSpec.hs
@@ -1,20 +1,19 @@
module Intlc.EndToEndSpec (spec) where
-import Data.ByteString.Lazy (ByteString)
-import qualified Data.Text as T
-import Intlc.Compiler (compileDataset)
-import Intlc.Core (Locale (Locale))
-import Intlc.Parser (parseDataset)
-import Prelude hiding (ByteString)
-import System.FilePath ((<.>), (>))
+import qualified Data.Text as T
+import Intlc.Compiler (compileDataset)
+import Intlc.Core (Locale (Locale))
+import Intlc.Parser (parseDataset)
+import Prelude
+import System.FilePath ((<.>), (>))
import Test.Hspec
-import Test.Hspec.Golden (Golden (..), defaultGolden)
-import Text.RawString.QQ (r)
+import Test.Hspec.Golden (Golden (..), defaultGolden)
+import Text.RawString.QQ (r)
-parseAndCompileDataset :: ByteString -> Either (NonEmpty Text) Text
-parseAndCompileDataset = compileDataset (Locale "en-US") <=< first (pure . show) . parseDataset
+parseAndCompileDataset :: Text -> Either (NonEmpty Text) Text
+parseAndCompileDataset = compileDataset (Locale "en-US") <=< first (pure . show) . parseDataset "test"
-golden :: String -> ByteString -> Golden String
+golden :: String -> Text -> Golden String
golden name in' = baseCfg
{ goldenFile = goldenFile baseCfg <.> "ts"
, actualFile = actualFile baseCfg <&> (<.> "ts")
@@ -25,7 +24,7 @@ golden name in' = baseCfg
fromErrs (Right x) = x
fromErrs (Left es) = T.intercalate "\n" . toList $ es
-(=*=) :: ByteString -> Text -> IO ()
+(=*=) :: Text -> Text -> IO ()
x =*= y = parseAndCompileDataset x `shouldBe` Right y
withReactImport :: Text -> Text
diff --git a/test/Intlc/ParserSpec.hs b/test/Intlc/Parser/ICUSpec.hs
similarity index 94%
rename from test/Intlc/ParserSpec.hs
rename to test/Intlc/Parser/ICUSpec.hs
index e50f326..d58e7e1 100644
--- a/test/Intlc/ParserSpec.hs
+++ b/test/Intlc/Parser/ICUSpec.hs
@@ -1,21 +1,23 @@
-module Intlc.ParserSpec (spec) where
+module Intlc.Parser.ICUSpec (spec) where
import Intlc.ICU
-import Intlc.Parser
-import Prelude hiding (ByteString)
+import Intlc.Parser.Error (MessageParseErr (..),
+ ParseErr (FailedMsgParse), ParseFailure)
+import Intlc.Parser.ICU
+import Prelude
import Test.Hspec
import Test.Hspec.Megaparsec hiding (initialState)
-import Text.Megaparsec (ParseErrorBundle, runParser)
+import Text.Megaparsec (runParser)
import Text.Megaparsec.Error (ErrorFancy (ErrorCustom))
-parseWith :: ParserState -> Parser a -> Text -> Either (ParseErrorBundle Text MessageParseErr) a
+parseWith :: ParserState -> Parser a -> Text -> Either ParseFailure a
parseWith s p = runParser (runReaderT p s) "test"
-parse :: Parser a -> Text -> Either (ParseErrorBundle Text MessageParseErr) a
+parse :: Parser a -> Text -> Either ParseFailure a
parse = parseWith initialState
spec :: Spec
-spec = describe "parser" $ do
+spec = describe "ICU parser" $ do
describe "message" $ do
it "does not tolerate unclosed braces" $ do
parse msg `shouldFailOn` "a { b"
@@ -133,7 +135,7 @@ spec = describe "parser" $ do
parse callback `shouldFailOn` ""
it "reports friendly error for bad closing tag" $ do
- let e i = errFancy i . fancy . ErrorCustom
+ let e i = errFancy i . fancy . ErrorCustom . FailedMsgParse
parse callback " there" `shouldFailWith` e 1 (NoClosingCallbackTag "hello")
parse callback " " `shouldFailWith` e 10 (BadClosingCallbackTag "hello" "there")
diff --git a/test/Intlc/Parser/JSONSpec.hs b/test/Intlc/Parser/JSONSpec.hs
new file mode 100644
index 0000000..49d634b
--- /dev/null
+++ b/test/Intlc/Parser/JSONSpec.hs
@@ -0,0 +1,80 @@
+module Intlc.Parser.JSONSpec (spec) where
+
+import Intlc.Core
+import Intlc.Parser (parseDataset)
+import Intlc.Parser.Error (JSONParseErr (..), MessageParseErr (..),
+ ParseErr (..), ParseFailure)
+import Prelude
+import Test.Hspec
+import Test.Hspec.Megaparsec hiding (initialState)
+import Text.Megaparsec (ErrorFancy (ErrorCustom), ParseError)
+import Text.RawString.QQ (r)
+
+parse :: Text -> Either ParseFailure (Dataset Translation)
+parse = parseDataset "test"
+
+succeedsOn :: Text -> Expectation
+succeedsOn = shouldSucceedOn parse
+
+e :: Int -> ParseErr -> ParseError s ParseErr
+e i = errFancy i . fancy . ErrorCustom
+
+spec :: Spec
+spec = describe "JSON parser" $ do
+ it "parses multiple translations" $ do
+ succeedsOn [r|{ "f": { "message": "{foo}" }, "g": { "message": "{bar}" } }|]
+
+ it "parses translation data keys in any order" $ do
+ succeedsOn [r|{ "f": { "message": "{foo}", "backend": "ts", "description": "bar" } }|]
+ succeedsOn [r|{ "f": { "message": "{foo}", "backend": "ts" } }|]
+ succeedsOn [r|{ "f": { "message": "{foo}", "description": "bar", "backend": "ts" } }|]
+ succeedsOn [r|{ "f": { "message": "{foo}", "description": "bar" } }|]
+ succeedsOn [r|{ "f": { "backend": "ts", "message": "{foo}", "description": "bar" } }|]
+ succeedsOn [r|{ "f": { "backend": "ts", "message": "{foo}" } }|]
+ succeedsOn [r|{ "f": { "backend": "ts", "description": "bar", "message": "{foo}" } }|]
+ succeedsOn [r|{ "f": { "description": "bar", "message": "{foo}", "backend": "ts" } }|]
+ succeedsOn [r|{ "f": { "description": "bar", "message": "{foo}" } }|]
+ succeedsOn [r|{ "f": { "description": "bar", "backend": "ts", "message": "{foo}" } }|]
+
+ it "accepts null or absence for optional keys" $ do
+ succeedsOn [r|{ "f": { "message": "{foo}", "backend": null, "description": null } }|]
+ succeedsOn [r|{ "f": { "message": "{foo}" } }|]
+
+ it "rejects duplicate keys" $ do
+ parse [r|{
+ "a": { "message": "{foo}" },
+ "b": { "message": "{foo}" },
+ "c": { "message": "{foo}" },
+ "b": { "message": "{foo}" },
+ "b": { "message": "{foo}" },
+ "d": { "message": "{foo}" },
+ "e": { "message": "{foo}" },
+ "e": { "message": "{foo}" }
+ }|] `shouldFailWithM`
+ [ e 113 (FailedJSONParse $ DuplicateKey "b")
+ , e 148 (FailedJSONParse $ DuplicateKey "b")
+ , e 253 (FailedJSONParse $ DuplicateKey "e")
+ ]
+
+ it "reports all custom error types simultaneously" $ do
+ parse [r|{
+ "dupeKey": {
+ "message": ""
+ },
+ "noClosing": {
+ "message": "bar"
+ },
+ "wrongClosing": {
+ "message": ""
+ },
+ "dupeKey": {
+ "message": ""
+ },
+ "ok": {
+ "message": "{n, number}"
+ }
+ }|] `shouldFailWithM`
+ [ e 94 (FailedMsgParse $ NoClosingCallbackTag "foo")
+ , e 163 (FailedMsgParse $ BadClosingCallbackTag "foo" "bar")
+ , e 184 (FailedJSONParse $ DuplicateKey "dupeKey")
+ ]