diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index fc8c7da..9547634 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -47,7 +47,7 @@ The ICU message parser is a [recursive descent parser](https://en.wikipedia.org/ For example, given an ICU message `hello {name}`, we’d first try parsing for an interpolation or tag, and failing that would parse plaintext, which we’d do until we encountered a reason to stop, in this case the tag opening character. We’ve stored "hello " as plaintext and will now continue along, this time succeeding in parsing the tag. We’ll now recursively parse inside the bounds of the tag, reusing the same top-level parser we were just using, this time parsing an interpolation. Having done this we’ve _consumed_ the entire input string and have successfully parsed a recursive list of nodes making up our [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree). -JSON parsing is offloaded to [aeson](https://hackage.haskell.org/package/aeson). +JSON parsing is handled internally for better interop with the ICU parser. JSON encoding is offloaded to [aeson](https://hackage.haskell.org/package/aeson). ### Compilation diff --git a/README.md b/README.md index f6b3e89..c143103 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Translation files should be encoded as JSON and might look something like this: { "welcome": { "message": "Hello {name}", + "description": "Welcome message", "backend": "ts" } } diff --git a/cli/Main.hs b/cli/Main.hs index 3bf54e5..d412957 100644 --- a/cli/Main.hs +++ b/cli/Main.hs @@ -1,10 +1,11 @@ module Main where -import CLI (Opts (..), getOpts) -import qualified Data.Text as T -import Intlc.Compiler (compileDataset, compileFlattened) +import CLI (Opts (..), getOpts) +import qualified Data.Text as T +import Intlc.Compiler (compileDataset, compileFlattened) import Intlc.Core -import Intlc.Parser (ParseFailure, parseDataset, printErr) +import Intlc.Parser (parseDataset, printErr) +import Intlc.Parser.Error (ParseFailure) import Prelude main :: IO () @@ -16,5 +17,4 @@ main = getOpts >>= \case compilerDie = die . T.unpack . ("Invalid keys:\n" <>) . T.intercalate "\n" . fmap ("\t" <>) . toList getParsed :: FilePath -> IO (Either ParseFailure (Dataset Translation)) -getParsed = fmap parseDataset . readFileLBS - +getParsed x = parseDataset x <$> readFileText x diff --git a/intlc.cabal b/intlc.cabal index d65c4b5..41dbe0c 100644 --- a/intlc.cabal +++ b/intlc.cabal @@ -55,6 +55,9 @@ library Intlc.Core Intlc.ICU Intlc.Parser + Intlc.Parser.Error + Intlc.Parser.JSON + Intlc.Parser.ICU Utils test-suite test-intlc @@ -78,4 +81,5 @@ test-suite test-intlc Intlc.Backend.TypeScriptSpec Intlc.CompilerSpec Intlc.EndToEndSpec - Intlc.ParserSpec + Intlc.Parser.JSONSpec + Intlc.Parser.ICUSpec diff --git a/lib/Intlc/Core.hs b/lib/Intlc/Core.hs index 10c735f..7ab86be 100644 --- a/lib/Intlc/Core.hs +++ b/lib/Intlc/Core.hs @@ -2,11 +2,8 @@ module Intlc.Core where -import Data.Aeson (FromJSON (..), ToJSON (toEncoding), - withObject, withText, (.!=), (.:), (.:?), - (.=)) +import Data.Aeson (ToJSON (toEncoding), (.=)) import Data.Aeson.Encoding (pairs, string) -import qualified Data.Text as T import Intlc.ICU (Message) import Prelude @@ -22,12 +19,6 @@ data Backend | TypeScriptReact deriving (Show, Eq, Generic) -instance FromJSON Backend where - parseJSON = withText "Backend" decode - where decode "ts" = pure TypeScript - decode "tsx" = pure TypeScriptReact - decode x = fail $ "Unknown backend: " <> T.unpack x - instance ToJSON Backend where toEncoding TypeScript = string "ts" toEncoding TypeScriptReact = string "tsx" @@ -39,13 +30,6 @@ data UnparsedTranslation = UnparsedTranslation } deriving (Show, Eq, Generic) -instance FromJSON UnparsedTranslation where - parseJSON = withObject "UnparsedTranslation" decode - where decode x = UnparsedTranslation - <$> x .: "message" - <*> x .:? "backend" .!= TypeScript - <*> x .:? "description" - instance ToJSON UnparsedTranslation where toEncoding (UnparsedTranslation msg be md) = pairs $ "message" .= msg diff --git a/lib/Intlc/Parser.hs b/lib/Intlc/Parser.hs index bf2f3b3..04a3cd7 100644 --- a/lib/Intlc/Parser.hs +++ b/lib/Intlc/Parser.hs @@ -1,239 +1,14 @@ --- This module follows the following whitespace rules: --- * Consume all whitespace after tokens where possible. --- * Therefore, assume no whitespace before tokens. - -{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} -{-# HLINT ignore "Use newtype instead of data" #-} - module Intlc.Parser where -import qualified Control.Applicative.Combinators.NonEmpty as NE -import Data.Aeson (decode) -import Data.ByteString.Lazy (ByteString) -import qualified Data.Map as M -import qualified Data.Text as T -import Data.Validation (toEither, - validationNel) -import Data.Void () import Intlc.Core -import Intlc.ICU -import Prelude hiding (ByteString) -import Text.Megaparsec hiding (State, Stream, - Token, many, some, - token) -import Text.Megaparsec.Char -import qualified Text.Megaparsec.Char.Lexer as L -import Text.Megaparsec.Error.Builder - -type ParseErr = ParseErrorBundle Text MessageParseErr - -data ParseFailure - = FailedJsonParse - | FailedDatasetParse (NonEmpty ParseErr) - deriving (Show, Eq) +import Intlc.Parser.Error (ParseFailure) +import Intlc.Parser.JSON (ParserState (ParserState), dataset) +import Prelude +import Text.Megaparsec (runParser) +import Text.Megaparsec.Error -data MessageParseErr - = NoClosingCallbackTag Text - | BadClosingCallbackTag Text Text - deriving (Show, Eq, Ord) - -instance ShowErrorComponent MessageParseErr where - showErrorComponent (NoClosingCallbackTag x) = "Callback tag <" <> T.unpack x <> "> not closed" - showErrorComponent (BadClosingCallbackTag x y) = "Callback tag <" <> T.unpack x <> "> not closed, instead found T.unpack y <> ">" - -failingWith :: MonadParsec e s m => Int -> e -> m a -pos `failingWith` e = parseError . errFancy pos . fancy . ErrorCustom $ e +parseDataset :: FilePath -> Text -> Either ParseFailure (Dataset Translation) +parseDataset = runParser (evalStateT dataset (ParserState mempty)) printErr :: ParseFailure -> String -printErr FailedJsonParse = "Failed to parse JSON" -printErr (FailedDatasetParse es) = intercalate "\n" . toList . fmap errorBundlePretty $ es - -parseDataset :: ByteString -> Either ParseFailure (Dataset Translation) -parseDataset = parse' <=< decode' - where decode' = maybeToRight FailedJsonParse . decode - parse' = toEither . first FailedDatasetParse . M.traverseWithKey ((validationNel .) . parseTranslationFor) - -parseTranslationFor :: Text -> UnparsedTranslation -> Either ParseErr Translation -parseTranslationFor name (UnparsedTranslation umsg be md) = do - msg' <- runParser (runReaderT msg initialState) (T.unpack name) umsg - pure $ Translation msg' be md - -data ParserState = ParserState - { pluralCtxName :: Maybe Text - } - -initialState :: ParserState -initialState = ParserState mempty - -type Parser = ReaderT ParserState (Parsec MessageParseErr Text) - -ident :: Parser Text -ident = T.pack <$> some letterChar - -msg :: Parser Message -msg = f . mergePlaintext <$> manyTill token eof - where f [] = Static "" - f [Plaintext x] = Static x - f (x:xs) = Dynamic (x :| xs) - -token :: Parser Token -token = choice - [ Interpolation <$> (interp <|> callback) - -- Plural cases support interpolating the number/argument in context with - -- `#`. When there's no such context, fail the parse in effect treating it - -- as plaintext. - , asks pluralCtxName >>= \case - Just n -> Interpolation (Arg n PluralRef) <$ string "#" - Nothing -> empty - , Plaintext <$> (try escaped <|> plaintext) - ] - -plaintext :: Parser Text -plaintext = T.singleton <$> L.charLiteral - -escaped :: Parser Text -escaped = apos *> choice - -- Double escape two apostrophes as one: "''" -> "'" - [ "'" <$ apos - -- Escape everything until another apostrophe, being careful of internal - -- double escapes: "'{a''}'" -> "{a'}" - , try $ T.pack <$> someTillNotDouble L.charLiteral apos - -- Escape the next syntax character as plaintext: "'{" -> "{" - , T.singleton <$> syn - ] - where apos = char '\'' - syn = char '{' <|> char '<' - -- Like `someTill`, but doesn't end upon encountering two `end` tokens, - -- instead consuming them as one and continuing. - someTillNotDouble p end = tryOne - where tryOne = (:) <$> p <*> go - go = ((:) <$> try (end <* end) <*> go) <|> (mempty <$ end) <|> tryOne - -callback :: Parser Arg -callback = do - oname <- string "<" *> ident <* string ">" - mrest <- observing ((,,) <$> children <* string " getOffset <*> ident <* string ">") - case mrest of - Left _ -> 1 `failingWith` NoClosingCallbackTag oname - Right (ch, pos, cname) -> if oname == cname - then pure (Arg oname ch) - else pos `failingWith` BadClosingCallbackTag oname cname - where children = Callback . mergePlaintext <$> manyTill token (lookAhead $ string " ident - Arg n <$> choice - [ String <$ string "}" - , sep *> body n <* string "}" - ] - where sep = string "," <* hspace1 - body n = choice - [ uncurry Bool <$> (string "boolean" *> sep *> boolCases) - , Number <$ string "number" - , Date <$> (string "date" *> sep *> dateTimeFmt) - , Time <$> (string "time" *> sep *> dateTimeFmt) - , Plural <$> withPluralCtx n ( - string "plural" *> sep *> cardinalPluralCases - <|> string "selectordinal" *> sep *> ordinalPluralCases - ) - , uncurry Select <$> (string "select" *> sep *> selectCases) - ] - withPluralCtx n = withReaderT (const . ParserState . pure $ n) - -dateTimeFmt :: Parser DateTimeFmt -dateTimeFmt = choice - [ Short <$ string "short" - , Medium <$ string "medium" - , Long <$ string "long" - , Full <$ string "full" - ] - -caseBody :: Parser Stream -caseBody = mergePlaintext <$> (string "{" *> manyTill token (string "}")) - -boolCases :: Parser (Stream, Stream) -boolCases = (,) - <$> (string "true" *> hspace1 *> caseBody) - <* hspace1 - <*> (string "false" *> hspace1 *> caseBody) - -selectCases :: Parser (NonEmpty SelectCase, Maybe SelectWildcard) -selectCases = (,) <$> cases <*> optional wildcard - where cases = NE.sepEndBy1 (SelectCase <$> (name <* hspace1) <*> caseBody) hspace1 - wildcard = SelectWildcard <$> (string wildcardName *> hspace1 *> caseBody) - name = try $ mfilter (/= wildcardName) ident - wildcardName = "other" - -cardinalPluralCases :: Parser Plural -cardinalPluralCases = fmap Cardinal . tryClassify =<< p - where tryClassify = maybe empty pure . uncurry classifyCardinal - p = (,) <$> disorderedPluralCases <*> optional pluralWildcard - -ordinalPluralCases :: Parser Plural -ordinalPluralCases = fmap Ordinal . tryClassify =<< p - where tryClassify = maybe empty pure . uncurry classifyOrdinal - p = (,) <$> disorderedPluralCases <*> pluralWildcard - --- Need to lift parsed plural cases into this type to make the list homogeneous. -data ParsedPluralCase - = ParsedExact (PluralCase PluralExact) - | ParsedRule (PluralCase PluralRule) - -disorderedPluralCases :: Parser (NonEmpty ParsedPluralCase) -disorderedPluralCases = flip NE.sepEndBy1 hspace1 $ choice - [ (ParsedExact .) . PluralCase <$> pluralExact <* hspace1 <*> caseBody - , (ParsedRule .) . PluralCase <$> pluralRule <* hspace1 <*> caseBody - ] - -pluralExact :: Parser PluralExact -pluralExact = PluralExact . T.pack <$> (string "=" *> some numberChar) - -pluralRule :: Parser PluralRule -pluralRule = choice - [ Zero <$ string "zero" - , One <$ string "one" - , Two <$ string "two" - , Few <$ string "few" - , Many <$ string "many" - ] - -pluralWildcard :: Parser PluralWildcard -pluralWildcard = PluralWildcard <$> (string "other" *> hspace1 *> caseBody) - --- | To simplify parsing cases we validate after-the-fact here. This achieves --- two purposes. Firstly it enables us to fail the parse if the cases are not --- exclusively literals and there's no wildcard (see below), and secondly it --- allows us to organise the cases into the appropriate `Plural` constructors, --- which in turn enables more efficient codegen later on. --- --- =0 {} =1 {} -- Lit --- =0 {} =1 {} other {} -- Lit --- one {} two {} other {} -- Rule --- =0 {} one {} other {} -- Mixed --- -classifyCardinal :: Foldable f => f ParsedPluralCase -> Maybe PluralWildcard -> Maybe CardinalPlural -classifyCardinal xs mw = - case (organisePluralCases xs, mw) of - ((Just ls, Nothing), mw') -> Just (LitPlural ls mw') - ((Nothing, Just rs), Just w) -> Just (RulePlural rs w) - ((Just ls, Just rs), Just w) -> Just (MixedPlural ls rs w) - -- Rule plurals require a wildcard. - ((_, Just _), Nothing) -> Nothing - -- We should have parsed and organised at least one case somewhere. - ((Nothing, Nothing), _) -> Nothing - --- | This is simpler than its cardinal counterpart. Here we need only validate --- that there is at least one rule case. This is performed here to simplify --- supporting disordered cases in the parser (whereas validating the presence --- of a wildcard at the end is trivial in the parser). -classifyOrdinal :: Foldable f => f ParsedPluralCase -> PluralWildcard -> Maybe OrdinalPlural -classifyOrdinal xs w = - case organisePluralCases xs of - (_, Nothing) -> Nothing - (mls, Just rs) -> Just $ OrdinalPlural (foldMap toList mls) rs w - -organisePluralCases :: Foldable f => f ParsedPluralCase -> (Maybe (NonEmpty (PluralCase PluralExact)), Maybe (NonEmpty (PluralCase PluralRule))) -organisePluralCases = bimap nonEmpty nonEmpty . foldr f mempty - where f (ParsedExact x) = first (x:) - f (ParsedRule x) = second (x:) +printErr = errorBundlePretty diff --git a/lib/Intlc/Parser/Error.hs b/lib/Intlc/Parser/Error.hs new file mode 100644 index 0000000..552dcb8 --- /dev/null +++ b/lib/Intlc/Parser/Error.hs @@ -0,0 +1,44 @@ +-- Our parsers are unavoidably tied together, and it's easiest if dependent +-- parsers share the same error type, but we also need to avoid cyclic +-- dependencies - so it all lives here. + +{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} +{-# HLINT ignore "Use newtype instead of data" #-} + +module Intlc.Parser.Error where + +import qualified Data.Text as T +import Prelude +import Text.Megaparsec (MonadParsec (parseError)) +import Text.Megaparsec.Error +import Text.Megaparsec.Error.Builder + +type ParseFailure = ParseErrorBundle Text ParseErr + +data ParseErr + = FailedJSONParse JSONParseErr + | FailedMsgParse MessageParseErr + deriving (Show, Eq, Ord) + +data JSONParseErr + = DuplicateKey Text + deriving (Show, Eq, Ord) + +data MessageParseErr + = NoClosingCallbackTag Text + | BadClosingCallbackTag Text Text + deriving (Show, Eq, Ord) + +instance ShowErrorComponent ParseErr where + showErrorComponent (FailedJSONParse e) = showErrorComponent e + showErrorComponent (FailedMsgParse e) = showErrorComponent e + +instance ShowErrorComponent JSONParseErr where + showErrorComponent (DuplicateKey k) = "Duplicate key: \"" <> T.unpack k <> "\"" + +instance ShowErrorComponent MessageParseErr where + showErrorComponent (NoClosingCallbackTag x) = "Callback tag <" <> T.unpack x <> "> not closed" + showErrorComponent (BadClosingCallbackTag x y) = "Callback tag <" <> T.unpack x <> "> not closed, instead found T.unpack y <> ">" + +failingWith :: MonadParsec e s m => Int -> e -> m a +pos `failingWith` e = parseError . errFancy pos . fancy . ErrorCustom $ e diff --git a/lib/Intlc/Parser/ICU.hs b/lib/Intlc/Parser/ICU.hs new file mode 100644 index 0000000..d64649d --- /dev/null +++ b/lib/Intlc/Parser/ICU.hs @@ -0,0 +1,210 @@ +-- This module follows the following whitespace rules: +-- * Consume all whitespace after tokens where possible. +-- * Therefore, assume no whitespace before tokens. + +{-# LANGUAGE FlexibleContexts #-} + +{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} +{-# HLINT ignore "Use newtype instead of data" #-} + +module Intlc.Parser.ICU where + +import qualified Control.Applicative.Combinators.NonEmpty as NE +import qualified Data.Text as T +import Data.Void () +import Intlc.ICU +import Intlc.Parser.Error (MessageParseErr (..), + ParseErr (FailedMsgParse), + failingWith) +import Prelude +import Text.Megaparsec hiding (State, Stream, + Token, many, some, + token) +import Text.Megaparsec.Char +import qualified Text.Megaparsec.Char.Lexer as L + +failingWith' :: MonadParsec ParseErr s m => Int -> MessageParseErr -> m a +i `failingWith'` e = i `failingWith` FailedMsgParse e + +data ParserState = ParserState + { pluralCtxName :: Maybe Text + } + +initialState :: ParserState +initialState = ParserState mempty + +type Parser = ReaderT ParserState (Parsec ParseErr Text) + +ident :: Parser Text +ident = T.pack <$> some letterChar + +toMsg :: Stream -> Message +toMsg = mergePlaintext >>> \case + [] -> Static "" + [Plaintext x] -> Static x + (x:xs) -> Dynamic (x :| xs) + +msg :: Parser Message +msg = toMsg <$> manyTill token eof + +eom :: Parser () +eom = void $ char '"' + +token :: Parser Token +token = choice + [ Interpolation <$> (interp <|> callback) + -- Plural cases support interpolating the number/argument in context with + -- `#`. When there's no such context, fail the parse in effect treating it + -- as plaintext. + , asks pluralCtxName >>= \case + Just n -> Interpolation (Arg n PluralRef) <$ string "#" + Nothing -> empty + , Plaintext <$> (try escaped <|> plaintext) + ] + +plaintext :: Parser Text +plaintext = T.singleton <$> L.charLiteral + +escaped :: Parser Text +escaped = apos *> choice + -- Double escape two apostrophes as one: "''" -> "'" + [ "'" <$ apos + -- Escape everything until another apostrophe, being careful of internal + -- double escapes: "'{a''}'" -> "{a'}" + , try $ T.pack <$> someTillNotDouble L.charLiteral apos + -- Escape the next syntax character as plaintext: "'{" -> "{" + , T.singleton <$> syn + ] + where apos = char '\'' + syn = char '{' <|> char '<' + -- Like `someTill`, but doesn't end upon encountering two `end` tokens, + -- instead consuming them as one and continuing. + someTillNotDouble p end = tryOne + where tryOne = (:) <$> p <*> go + go = ((:) <$> try (end <* end) <*> go) <|> (mempty <$ end) <|> tryOne + +callback :: Parser Arg +callback = do + (openPos, oname) <- (,) <$> (string "<" *> getOffset) <*> ident <* string ">" + mrest <- observing ((,,) <$> children <* string " getOffset <*> ident <* string ">") + case mrest of + Left _ -> openPos `failingWith'` NoClosingCallbackTag oname + Right (ch, closePos, cname) -> if oname == cname + then pure (Arg oname ch) + else closePos `failingWith'` BadClosingCallbackTag oname cname + where children = Callback . mergePlaintext <$> manyTill token (lookAhead $ void (string " eom) + +interp :: Parser Arg +interp = between (char '{') (char '}') $ do + n <- ident + Arg n <$> option String (sep *> body n) + where sep = string "," <* hspace1 + body n = choice + [ uncurry Bool <$> (string "boolean" *> sep *> boolCases) + , Number <$ string "number" + , Date <$> (string "date" *> sep *> dateTimeFmt) + , Time <$> (string "time" *> sep *> dateTimeFmt) + , Plural <$> withPluralCtx n ( + string "plural" *> sep *> cardinalPluralCases + <|> string "selectordinal" *> sep *> ordinalPluralCases + ) + , uncurry Select <$> (string "select" *> sep *> selectCases) + ] + withPluralCtx n = withReaderT (const . ParserState . pure $ n) + +dateTimeFmt :: Parser DateTimeFmt +dateTimeFmt = choice + [ Short <$ string "short" + , Medium <$ string "medium" + , Long <$ string "long" + , Full <$ string "full" + ] + +caseBody :: Parser Stream +caseBody = mergePlaintext <$> (string "{" *> manyTill token (string "}")) + +boolCases :: Parser (Stream, Stream) +boolCases = (,) + <$> (string "true" *> hspace1 *> caseBody) + <* hspace1 + <*> (string "false" *> hspace1 *> caseBody) + +selectCases :: Parser (NonEmpty SelectCase, Maybe SelectWildcard) +selectCases = (,) <$> cases <*> optional wildcard + where cases = NE.sepEndBy1 (SelectCase <$> (name <* hspace1) <*> caseBody) hspace1 + wildcard = SelectWildcard <$> (string wildcardName *> hspace1 *> caseBody) + name = try $ mfilter (/= wildcardName) ident + wildcardName = "other" + +cardinalPluralCases :: Parser Plural +cardinalPluralCases = fmap Cardinal . tryClassify =<< p + where tryClassify = maybe empty pure . uncurry classifyCardinal + p = (,) <$> disorderedPluralCases <*> optional pluralWildcard + +ordinalPluralCases :: Parser Plural +ordinalPluralCases = fmap Ordinal . tryClassify =<< p + where tryClassify = maybe empty pure . uncurry classifyOrdinal + p = (,) <$> disorderedPluralCases <*> pluralWildcard + +-- Need to lift parsed plural cases into this type to make the list homogeneous. +data ParsedPluralCase + = ParsedExact (PluralCase PluralExact) + | ParsedRule (PluralCase PluralRule) + +disorderedPluralCases :: Parser (NonEmpty ParsedPluralCase) +disorderedPluralCases = flip NE.sepEndBy1 hspace1 $ choice + [ (ParsedExact .) . PluralCase <$> pluralExact <* hspace1 <*> caseBody + , (ParsedRule .) . PluralCase <$> pluralRule <* hspace1 <*> caseBody + ] + +pluralExact :: Parser PluralExact +pluralExact = PluralExact . T.pack <$> (string "=" *> some numberChar) + +pluralRule :: Parser PluralRule +pluralRule = choice + [ Zero <$ string "zero" + , One <$ string "one" + , Two <$ string "two" + , Few <$ string "few" + , Many <$ string "many" + ] + +pluralWildcard :: Parser PluralWildcard +pluralWildcard = PluralWildcard <$> (string "other" *> hspace1 *> caseBody) + +-- | To simplify parsing cases we validate after-the-fact here. This achieves +-- two purposes. Firstly it enables us to fail the parse if the cases are not +-- exclusively literals and there's no wildcard (see below), and secondly it +-- allows us to organise the cases into the appropriate `Plural` constructors, +-- which in turn enables more efficient codegen later on. +-- +-- =0 {} =1 {} -- Lit +-- =0 {} =1 {} other {} -- Lit +-- one {} two {} other {} -- Rule +-- =0 {} one {} other {} -- Mixed +-- +classifyCardinal :: Foldable f => f ParsedPluralCase -> Maybe PluralWildcard -> Maybe CardinalPlural +classifyCardinal xs mw = + case (organisePluralCases xs, mw) of + ((Just ls, Nothing), mw') -> Just (LitPlural ls mw') + ((Nothing, Just rs), Just w) -> Just (RulePlural rs w) + ((Just ls, Just rs), Just w) -> Just (MixedPlural ls rs w) + -- Rule plurals require a wildcard. + ((_, Just _), Nothing) -> Nothing + -- We should have parsed and organised at least one case somewhere. + ((Nothing, Nothing), _) -> Nothing + +-- | This is simpler than its cardinal counterpart. Here we need only validate +-- that there is at least one rule case. This is performed here to simplify +-- supporting disordered cases in the parser (whereas validating the presence +-- of a wildcard at the end is trivial in the parser). +classifyOrdinal :: Foldable f => f ParsedPluralCase -> PluralWildcard -> Maybe OrdinalPlural +classifyOrdinal xs w = + case organisePluralCases xs of + (_, Nothing) -> Nothing + (mls, Just rs) -> Just $ OrdinalPlural (foldMap toList mls) rs w + +organisePluralCases :: Foldable f => f ParsedPluralCase -> (Maybe (NonEmpty (PluralCase PluralExact)), Maybe (NonEmpty (PluralCase PluralRule))) +organisePluralCases = bimap nonEmpty nonEmpty . foldr f mempty + where f (ParsedExact x) = first (x:) + f (ParsedRule x) = second (x:) diff --git a/lib/Intlc/Parser/JSON.hs b/lib/Intlc/Parser/JSON.hs new file mode 100644 index 0000000..c1ff90e --- /dev/null +++ b/lib/Intlc/Parser/JSON.hs @@ -0,0 +1,108 @@ +-- An in-house JSON parser specialised to our needs, piggybacking off of the +-- sibling ICU parser. Allows interop with our ICU parser and bypasses some +-- Aeson limitations. +-- +-- This module follows the following whitespace rules: +-- * Consume all whitespace after tokens where possible. +-- * Therefore, assume no whitespace before tokens. + +{-# LANGUAGE FlexibleContexts #-} + +{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} +{-# HLINT ignore "Use newtype instead of data" #-} + +module Intlc.Parser.JSON where + +import Control.Applicative.Permutations +import qualified Data.Map as M +import qualified Data.Set as Set +import qualified Data.Text as T +import Data.Void () +import Intlc.Core +import qualified Intlc.ICU as ICU +import Intlc.Parser.Error (JSONParseErr (..), + ParseErr (FailedJSONParse), + failingWith) +import Intlc.Parser.ICU (initialState, toMsg, token) +import Prelude hiding (null) +import Text.Megaparsec hiding (State, Stream, Token, + many, some, token) +import Text.Megaparsec.Char +import qualified Text.Megaparsec.Char.Lexer as L +import Text.Megaparsec.Error.Builder (errFancy, fancy) + +type Parser = StateT ParserState (Parsec ParseErr Text) + +data ParserState = ParserState + { keys :: Set Text + } + +failingWith' :: MonadParsec ParseErr s m => Int -> JSONParseErr -> m a +i `failingWith'` e = i `failingWith` FailedJSONParse e + +dataset :: Parser (Dataset Translation) +dataset = space *> objMap translation <* space <* eof + +-- It's important to use `toPermutationWithDefault` as opposed to standard +-- parser combinators like `optional` so that `intercalateEffect` can do its +-- magic. +-- +-- Additionally, the consistent application of whitespace is extremely +-- important, and the permutation appears to operate over the first parser, so +-- be careful around any abstractions around the key double quotes. +translation :: Parser Translation +translation = obj $ intercalateEffect objSep $ Translation + <$> toPermutation (objPair' "message" msg) + <*> toPermutationWithDefault TypeScript (objPair' "backend" (backendp <|> TypeScript <$ null)) + <*> toPermutationWithDefault Nothing (objPair' "description" (Just <$> strLit <|> Nothing <$ null)) + +msg :: Parser ICU.Message +msg = lift $ withRecovery recover p + where p = toMsg <$> runReaderT (char '"' *> manyTill token (char '"')) initialState + recover e = error "absurd" <$ consume <* registerParseError e + -- Once we've recovered we need to consume the rest of the message + -- string so that parsing can continue beyond it. + consume = void $ manyTill L.charLiteral (char '"') + +backendp :: Parser Backend +backendp = choice + [ TypeScript <$ string (dblqts "ts") + , TypeScriptReact <$ string (dblqts "tsx") + ] + +null :: Parser () +null = void $ string "null" + +strLit :: Parser Text +strLit = (T.pack <$>) $ char '"' *> manyTill L.charLiteral (char '"') + +dblqtsp :: Parser a -> Parser a +dblqtsp = between (char '"') (char '"') + +dblqts :: Text -> Text +dblqts x = "\"" <> x <> "\"" + +-- Parse a homogeneous object of arbitrary keys, failing with recovery upon the +-- presence of duplicate keys. +objMap :: Parser a -> Parser (Map Text a) +objMap v = fmap M.fromList . obj $ sepEndBy (objPair newKey v) objSep + where newKey = do + i <- getOffset + k <- strLit + prev <- gets keys + if Set.member k prev + then registerParseError . errFancy i . fancy . ErrorCustom . FailedJSONParse . DuplicateKey $ k + else modify (\x -> x { keys = Set.insert k prev }) + pure k + +obj :: Parser a -> Parser a +obj p = string "{" *> space *> p <* space <* string "}" + +objPair :: Parser Text -> Parser a -> Parser (Text, a) +objPair k v = (,) <$> k <*> (space *> char ':' *> space *> v) + +objPair' :: Text -> Parser a -> Parser a +objPair' k v = snd <$> objPair (string (dblqts k)) v + +objSep :: Parser () +objSep = void $ char ',' <* space diff --git a/test/Intlc/Backend/TypeScriptSpec.hs b/test/Intlc/Backend/TypeScriptSpec.hs index 20f1ac6..eab0551 100644 --- a/test/Intlc/Backend/TypeScriptSpec.hs +++ b/test/Intlc/Backend/TypeScriptSpec.hs @@ -7,7 +7,7 @@ import Intlc.Backend.TypeScript.Compiler (compileNamedExport, import qualified Intlc.Backend.TypeScript.Language as TS import Intlc.Core (Locale (Locale)) import qualified Intlc.ICU as ICU -import Prelude hiding (ByteString) +import Prelude import System.FilePath ((<.>), ()) import Test.Hspec import Test.Hspec.Golden (Golden (..), defaultGolden) diff --git a/test/Intlc/EndToEndSpec.hs b/test/Intlc/EndToEndSpec.hs index 4bc1c57..c33a2a7 100644 --- a/test/Intlc/EndToEndSpec.hs +++ b/test/Intlc/EndToEndSpec.hs @@ -1,20 +1,19 @@ module Intlc.EndToEndSpec (spec) where -import Data.ByteString.Lazy (ByteString) -import qualified Data.Text as T -import Intlc.Compiler (compileDataset) -import Intlc.Core (Locale (Locale)) -import Intlc.Parser (parseDataset) -import Prelude hiding (ByteString) -import System.FilePath ((<.>), ()) +import qualified Data.Text as T +import Intlc.Compiler (compileDataset) +import Intlc.Core (Locale (Locale)) +import Intlc.Parser (parseDataset) +import Prelude +import System.FilePath ((<.>), ()) import Test.Hspec -import Test.Hspec.Golden (Golden (..), defaultGolden) -import Text.RawString.QQ (r) +import Test.Hspec.Golden (Golden (..), defaultGolden) +import Text.RawString.QQ (r) -parseAndCompileDataset :: ByteString -> Either (NonEmpty Text) Text -parseAndCompileDataset = compileDataset (Locale "en-US") <=< first (pure . show) . parseDataset +parseAndCompileDataset :: Text -> Either (NonEmpty Text) Text +parseAndCompileDataset = compileDataset (Locale "en-US") <=< first (pure . show) . parseDataset "test" -golden :: String -> ByteString -> Golden String +golden :: String -> Text -> Golden String golden name in' = baseCfg { goldenFile = goldenFile baseCfg <.> "ts" , actualFile = actualFile baseCfg <&> (<.> "ts") @@ -25,7 +24,7 @@ golden name in' = baseCfg fromErrs (Right x) = x fromErrs (Left es) = T.intercalate "\n" . toList $ es -(=*=) :: ByteString -> Text -> IO () +(=*=) :: Text -> Text -> IO () x =*= y = parseAndCompileDataset x `shouldBe` Right y withReactImport :: Text -> Text diff --git a/test/Intlc/ParserSpec.hs b/test/Intlc/Parser/ICUSpec.hs similarity index 94% rename from test/Intlc/ParserSpec.hs rename to test/Intlc/Parser/ICUSpec.hs index e50f326..d58e7e1 100644 --- a/test/Intlc/ParserSpec.hs +++ b/test/Intlc/Parser/ICUSpec.hs @@ -1,21 +1,23 @@ -module Intlc.ParserSpec (spec) where +module Intlc.Parser.ICUSpec (spec) where import Intlc.ICU -import Intlc.Parser -import Prelude hiding (ByteString) +import Intlc.Parser.Error (MessageParseErr (..), + ParseErr (FailedMsgParse), ParseFailure) +import Intlc.Parser.ICU +import Prelude import Test.Hspec import Test.Hspec.Megaparsec hiding (initialState) -import Text.Megaparsec (ParseErrorBundle, runParser) +import Text.Megaparsec (runParser) import Text.Megaparsec.Error (ErrorFancy (ErrorCustom)) -parseWith :: ParserState -> Parser a -> Text -> Either (ParseErrorBundle Text MessageParseErr) a +parseWith :: ParserState -> Parser a -> Text -> Either ParseFailure a parseWith s p = runParser (runReaderT p s) "test" -parse :: Parser a -> Text -> Either (ParseErrorBundle Text MessageParseErr) a +parse :: Parser a -> Text -> Either ParseFailure a parse = parseWith initialState spec :: Spec -spec = describe "parser" $ do +spec = describe "ICU parser" $ do describe "message" $ do it "does not tolerate unclosed braces" $ do parse msg `shouldFailOn` "a { b" @@ -133,7 +135,7 @@ spec = describe "parser" $ do parse callback `shouldFailOn` "" it "reports friendly error for bad closing tag" $ do - let e i = errFancy i . fancy . ErrorCustom + let e i = errFancy i . fancy . ErrorCustom . FailedMsgParse parse callback " there" `shouldFailWith` e 1 (NoClosingCallbackTag "hello") parse callback " " `shouldFailWith` e 10 (BadClosingCallbackTag "hello" "there") diff --git a/test/Intlc/Parser/JSONSpec.hs b/test/Intlc/Parser/JSONSpec.hs new file mode 100644 index 0000000..49d634b --- /dev/null +++ b/test/Intlc/Parser/JSONSpec.hs @@ -0,0 +1,80 @@ +module Intlc.Parser.JSONSpec (spec) where + +import Intlc.Core +import Intlc.Parser (parseDataset) +import Intlc.Parser.Error (JSONParseErr (..), MessageParseErr (..), + ParseErr (..), ParseFailure) +import Prelude +import Test.Hspec +import Test.Hspec.Megaparsec hiding (initialState) +import Text.Megaparsec (ErrorFancy (ErrorCustom), ParseError) +import Text.RawString.QQ (r) + +parse :: Text -> Either ParseFailure (Dataset Translation) +parse = parseDataset "test" + +succeedsOn :: Text -> Expectation +succeedsOn = shouldSucceedOn parse + +e :: Int -> ParseErr -> ParseError s ParseErr +e i = errFancy i . fancy . ErrorCustom + +spec :: Spec +spec = describe "JSON parser" $ do + it "parses multiple translations" $ do + succeedsOn [r|{ "f": { "message": "{foo}" }, "g": { "message": "{bar}" } }|] + + it "parses translation data keys in any order" $ do + succeedsOn [r|{ "f": { "message": "{foo}", "backend": "ts", "description": "bar" } }|] + succeedsOn [r|{ "f": { "message": "{foo}", "backend": "ts" } }|] + succeedsOn [r|{ "f": { "message": "{foo}", "description": "bar", "backend": "ts" } }|] + succeedsOn [r|{ "f": { "message": "{foo}", "description": "bar" } }|] + succeedsOn [r|{ "f": { "backend": "ts", "message": "{foo}", "description": "bar" } }|] + succeedsOn [r|{ "f": { "backend": "ts", "message": "{foo}" } }|] + succeedsOn [r|{ "f": { "backend": "ts", "description": "bar", "message": "{foo}" } }|] + succeedsOn [r|{ "f": { "description": "bar", "message": "{foo}", "backend": "ts" } }|] + succeedsOn [r|{ "f": { "description": "bar", "message": "{foo}" } }|] + succeedsOn [r|{ "f": { "description": "bar", "backend": "ts", "message": "{foo}" } }|] + + it "accepts null or absence for optional keys" $ do + succeedsOn [r|{ "f": { "message": "{foo}", "backend": null, "description": null } }|] + succeedsOn [r|{ "f": { "message": "{foo}" } }|] + + it "rejects duplicate keys" $ do + parse [r|{ + "a": { "message": "{foo}" }, + "b": { "message": "{foo}" }, + "c": { "message": "{foo}" }, + "b": { "message": "{foo}" }, + "b": { "message": "{foo}" }, + "d": { "message": "{foo}" }, + "e": { "message": "{foo}" }, + "e": { "message": "{foo}" } + }|] `shouldFailWithM` + [ e 113 (FailedJSONParse $ DuplicateKey "b") + , e 148 (FailedJSONParse $ DuplicateKey "b") + , e 253 (FailedJSONParse $ DuplicateKey "e") + ] + + it "reports all custom error types simultaneously" $ do + parse [r|{ + "dupeKey": { + "message": "" + }, + "noClosing": { + "message": "bar" + }, + "wrongClosing": { + "message": "" + }, + "dupeKey": { + "message": "" + }, + "ok": { + "message": "{n, number}" + } + }|] `shouldFailWithM` + [ e 94 (FailedMsgParse $ NoClosingCallbackTag "foo") + , e 163 (FailedMsgParse $ BadClosingCallbackTag "foo" "bar") + , e 184 (FailedJSONParse $ DuplicateKey "dupeKey") + ]