diff --git a/CHANGELOG.md b/CHANGELOG.md index 15b8e2b..2e730f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ * Updated some dependencies. Now Nekomata can be built with GHC 9.8.1. * Fixed a bug in printing truncated results. +* Now characters are internally represented as `Word8`s instead of `Char`s. + +### Breaking changes + +* Fixed a bug in comparing characters. Now characters are compared by their code points in Nekomata's custom code page instead of Unicode. ## 0.5.1.0 -- 2023-09-27 diff --git a/analysis/corpus.txt b/analysis/corpus.txt index 01fef09..c47595d 100644 --- a/analysis/corpus.txt +++ b/analysis/corpus.txt @@ -290,3 +290,7 @@ Jĭ?∑haṀ ĎSᵉti¦ Jᵐ{x:ᵒ≈>~}aş# →r$ÇƆ/←ŗ0ɔ$ᵑ∆ +1D∑ +rjŢ +ŋ∑Aaṁ +ĕ3¦¿ĭÐŤʳXH"c"I diff --git a/analysis/freq_1gram.txt b/analysis/freq_1gram.txt index c7edc66..2f23aa2 100644 --- a/analysis/freq_1gram.txt +++ b/analysis/freq_1gram.txt @@ -1,7 +1,7 @@ { : 67 $ : 48 : : 45 -∑ : 39 +∑ : 41 = : 38 + : 36 ᵐ : 32 @@ -9,11 +9,11 @@ $ : 48 → : 28 R : 28 * : 27 +a : 26 } : 26 -a : 25 +1 : 23 - : 23 -1 : 22 -j : 21 +j : 22 ᵉ : 20 ç : 20 x : 20 @@ -22,54 +22,55 @@ x : 20 o : 18 ↔ : 18 # : 17 +3 : 17 u : 17 2 : 17 -3 : 16 +ĭ : 16 p : 16 ~ : 16 ↕ : 15 -ĭ : 15 l : 15 ᵖ : 15 Ṁ : 15 Ƃ : 14 +Ð : 14 S : 13 đ : 13 ∫ : 13 t : 13 Ň : 13 -Ð : 13 +" : 12 ᶦ : 12 N : 12 ½ : 12 +Ť : 12 Ɔ : 12 ᵑ : 12 Ɗ : 12 ; : 12 J : 11 Q : 11 -Ť : 11 ᶻ : 11 ≡ : 11 Z : 11 ĉ : 11 _ : 11 L : 11 -" : 10 < : 10 +Ţ : 10 ᵃ : 10 +r : 10 +ŋ : 10 ? : 10 Ë : 10 +ṁ : 10 ∏ : 10 ʷ : 9 z : 9 ˡ : 9 -Ţ : 9 -r : 9 -ŋ : 9 +c : 9 ᵚ : 9 ᶠ : 9 -ṁ : 9 ± : 9 P : 8 ᶜ : 8 @@ -81,7 +82,6 @@ E : 8 ž : 8 ≥ : 8 ‼ : 8 -c : 8 % : 8 ŗ : 8 ƒ : 8 @@ -94,6 +94,7 @@ f : 7 £ : 7 / : 7 ɗ : 7 +¦ : 7 Ĉ : 7 ∙ : 7 ä : 7 @@ -107,8 +108,8 @@ O : 7 ∕ : 6 Ä : 6 Ø : 6 -¦ : 6 ş : 6 +¿ : 6 i : 6 B : 6 ∩ : 6 @@ -116,33 +117,34 @@ B : 6 į : 6 ¢ : 6 ƀ : 5 +I : 5 y : 5 Ö : 5 +H : 5 d : 5 Ď : 5 ≤ : 5 Ç : 5 +ʳ : 5 Ĭ : 5 ɔ : 5 µ : 5 -¿ : 5 +ĕ : 5 h : 5 8 : 5 √ : 5 5 : 5 0 : 5 b : 5 -I : 4 -H : 4 +A : 5 ŧ : 4 -ʳ : 4 U : 4 G : 4 k : 4 F : 4 ƥ : 4 s : 4 -ĕ : 4 +D : 4 M : 4 > : 4 ÷ : 4 @@ -151,15 +153,14 @@ M : 4 ᵈ : 4 ň : 4 ᵋ : 4 -A : 4 ĝ : 3 Ṃ : 3 ř : 3 Y : 3 T : 3 Ħ : 3 -D : 3 7 : 3 +X : 3 ƶ : 3 & : 3 ≠ : 3 @@ -170,7 +171,6 @@ v : 3 Ƥ : 2 g : 2 ^ : 2 -X : 2 K : 2 6 : 2 m : 2 diff --git a/analysis/freq_2gram.txt b/analysis/freq_2gram.txt index 9372fae..ba45e2b 100644 --- a/analysis/freq_2gram.txt +++ b/analysis/freq_2gram.txt @@ -53,6 +53,7 @@ u# : 3 pN : 3 {ˣ : 3 {į : 3 +aṁ : 3 ↕ũ : 3 x: : 3 +} : 3 @@ -108,6 +109,7 @@ qᵗ : 2 ;ᶜ : 2 ᶜt : 2 :Ĭ : 2 +ÐŤ : 2 ∑≡ : 2 R~ : 2 ĉ~ : 2 @@ -131,10 +133,12 @@ Qƥ : 2 +1 : 2 12 : 2 {- : 2 +3¦ : 2 {≈ : 2 Ë: : 2 ᵒ& : 2 &Þ : 2 +ŋ∑ : 2 ĉᵐ : 2 Þ½ : 2 Sđ : 2 @@ -143,6 +147,7 @@ Sđ : 2 ;$ : 2 N, : 2 ½$ : 2 +ĭÐ : 2 oĉ : 2 oĭ : 2 ĉ# : 2 @@ -154,7 +159,6 @@ $đ : 2 _= : 2 Ṁ→ : 2 -ň : 2 -aṁ : 2 0* : 2 x= : 2 $ᶻ : 2 @@ -196,9 +200,11 @@ $L : 2 RS : 2 += : 2 ɔ$ : 2 +jŢ : 2 3~ : 2 ~ᵑ : 2 ᵋ∩ : 2 +rj : 2 ↕∆ : 2 ←_ : 2 :, : 2 @@ -330,7 +336,6 @@ tᶻ : 1 Ɗ: : 1 Ĭ; : 1 ;Ð : 1 -ÐŤ : 1 Ť∑ : 1 Fƒ : 1 ƒ# : 1 @@ -458,7 +463,6 @@ $∑ : 1 %→ : 1 -Z : 1 Z3 : 1 -3¦ : 1 RË : 1 Ë3 : 1 3÷ : 1 @@ -473,7 +477,6 @@ Ođ : 1 ≈∑ : 1 ∑Ƶ : 1 Þ£ : 1 -ŋ∑ : 1 ∑ũ : 1 ᵐ∫ : 1 ∫j : 1 @@ -506,7 +509,6 @@ _∑ : 1 *Ɗ : 1 Ɗa : 1 jĭ : 1 -ĭÐ : 1 Ðɗ : 1 ɗ$ : 1 Ţ≡ : 1 @@ -838,7 +840,6 @@ tI : 1 I} : 1 }ṁ : 1 Sj : 1 -jŢ : 1 Ţ½ : 1 ađ : 1 R↔ : 1 @@ -941,7 +942,6 @@ RF : 1 F£ : 1 £b : 1 ↕r : 1 -rj : 1 j@ : 1 @ũ : 1 ᵚp : 1 @@ -1208,3 +1208,17 @@ r$ : 1 ŗ0 : 1 0ɔ : 1 ᵑ∆ : 1 +1D : 1 +D∑ : 1 +∑A : 1 +Aa : 1 +ĕ3 : 1 +¦¿ : 1 +¿ĭ : 1 +Ťʳ : 1 +ʳX : 1 +XH : 1 +H" : 1 +"c : 1 +c" : 1 +"I : 1 diff --git a/analysis/freq_3gram.txt b/analysis/freq_3gram.txt index c323487..ebe8dde 100644 --- a/analysis/freq_3gram.txt +++ b/analysis/freq_3gram.txt @@ -1196,3 +1196,20 @@ $ÇƆ : 1 0ɔ$ : 1 ɔ$ᵑ : 1 $ᵑ∆ : 1 +1D∑ : 1 +rjŢ : 1 +ŋ∑A : 1 +∑Aa : 1 +Aaṁ : 1 +ĕ3¦ : 1 +3¦¿ : 1 +¦¿ĭ : 1 +¿ĭÐ : 1 +ĭÐŤ : 1 +ÐŤʳ : 1 +ŤʳX : 1 +ʳXH : 1 +XH" : 1 +H"c : 1 +"c" : 1 +c"I : 1 diff --git a/analysis/freq_4gram.txt b/analysis/freq_4gram.txt index 375b943..4e38454 100644 --- a/analysis/freq_4gram.txt +++ b/analysis/freq_4gram.txt @@ -972,3 +972,16 @@ $ÇƆ/ : 1 ŗ0ɔ$ : 1 0ɔ$ᵑ : 1 ɔ$ᵑ∆ : 1 +ŋ∑Aa : 1 +∑Aaṁ : 1 +ĕ3¦¿ : 1 +3¦¿ĭ : 1 +¦¿ĭÐ : 1 +¿ĭÐŤ : 1 +ĭÐŤʳ : 1 +ÐŤʳX : 1 +ŤʳXH : 1 +ʳXH" : 1 +XH"c : 1 +H"c" : 1 +"c"I : 1 diff --git a/analysis/freq_5gram.txt b/analysis/freq_5gram.txt index 7cb3cf3..3f9e2b7 100644 --- a/analysis/freq_5gram.txt +++ b/analysis/freq_5gram.txt @@ -765,3 +765,14 @@ $ÇƆ/← : 1 ←ŗ0ɔ$ : 1 ŗ0ɔ$ᵑ : 1 0ɔ$ᵑ∆ : 1 +ŋ∑Aaṁ : 1 +ĕ3¦¿ĭ : 1 +3¦¿ĭÐ : 1 +¦¿ĭÐŤ : 1 +¿ĭÐŤʳ : 1 +ĭÐŤʳX : 1 +ÐŤʳXH : 1 +ŤʳXH" : 1 +ʳXH"c : 1 +XH"c" : 1 +H"c"I : 1 diff --git a/analysis/particles.txt b/analysis/particles.txt index efd5195..a7c28a9 100644 --- a/analysis/particles.txt +++ b/analysis/particles.txt @@ -1,6 +1,6 @@ particle : with "{" / total ᵈ : 0 / 4 -ʳ : 0 / 4 +ʳ : 0 / 5 ˣ : 0 / 4 ᶻ : 0 / 11 ᶾ : 0 / 1 diff --git a/src/Nekomata/Builtin/String.hs b/src/Nekomata/Builtin/String.hs index d0f5068..dec61f8 100644 --- a/src/Nekomata/Builtin/String.hs +++ b/src/Nekomata/Builtin/String.hs @@ -1,7 +1,8 @@ module Nekomata.Builtin.String where import Control.Monad ((>=>)) -import Nekomata.CodePage (charToInt, intToChar) +import Data.Word (Word8) +import Nekomata.CodePage (byteToChar) import Nekomata.Data import Nekomata.Function import Nekomata.NonDet @@ -11,19 +12,25 @@ import Text.Parsec (eof, parse) charToInt' :: Function charToInt' = unaryVec charToInt'' where - charToInt'' _ (DCharT x) = liftChar (charToInt :: Char -> Maybe Integer) x + charToInt'' _ (DCharT x) = liftChar charToInt_ x charToInt'' _ _ = Fail + charToInt_ :: Word8 -> Integer + charToInt_ = fromIntegral intToChar' :: Function intToChar' = unaryVec intToChar'' where - intToChar'' _ (DNumT x) = liftInt intToChar x + intToChar'' _ (DNumT x) = liftInt intToChar_ x intToChar'' _ _ = Fail + intToChar_ :: Integer -> Maybe Word8 + intToChar_ x + | x >= 0 && x < 255 = Just $ fromIntegral x + | otherwise = Nothing read' :: Function read' = unary read'' where - read'' _ (DCharT x) = liftChar (\c -> read_ [c]) x + read'' _ (DCharT x) = liftChar (\c -> read_ [byteToChar c]) x read'' _ x = toTryData $ (asString >=> read_) <$> toTry x read_ = either (const Nothing) Just . parse (parseData <* eof) "" diff --git a/src/Nekomata/CodePage.hs b/src/Nekomata/CodePage.hs index 95a11e8..307cdd3 100644 --- a/src/Nekomata/CodePage.hs +++ b/src/Nekomata/CodePage.hs @@ -36,6 +36,10 @@ intToChar x | x >= 0 && x < 255 = Just $ codePage !! fromIntegral x | otherwise = Nothing +-- | Convert a byte to a character. +byteToChar :: Word8 -> Char +byteToChar = (codePage !!) . fromIntegral + -- A Markdown table of the code page codePageMarkdown :: String codePageMarkdown = diff --git a/src/Nekomata/Data.hs b/src/Nekomata/Data.hs index c84711d..13db148 100644 --- a/src/Nekomata/Data.hs +++ b/src/Nekomata/Data.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE TypeFamilies #-} @@ -8,7 +9,8 @@ import Control.Monad (join, liftM2) import Data.Bifunctor (bimap) import Data.Functor ((<&>)) import Data.Ratio (denominator, numerator) -import Nekomata.CodePage (charToInt) +import Data.Word (Word8) +import Nekomata.CodePage (byteToChar, charToInt) import Nekomata.NonDet -- | A helper function to lift a binary function to a monad @@ -199,7 +201,7 @@ instance (NonDet a) => NonDet (ListTry a) where -- | Nekomata's data type (deterministic) data Data = DNum Rational - | DChar Char + | DChar Word8 | DList [Data] deriving (Eq, Ord) @@ -208,7 +210,7 @@ asString :: Data -> Maybe String asString (DList []) = Nothing asString (DList xs) = mapM asChar xs where - asChar (DChar x) = Just x + asChar (DChar x) = Just $ byteToChar x asChar _ = Nothing asString _ = Nothing @@ -217,7 +219,7 @@ instance Show Data where if denominator x == 1 then show (numerator x) else show (numerator x) ++ "/" ++ show (denominator x) - show (DChar x) = show x + show (DChar x) = show (byteToChar x) show (DList x) = maybe (show x) quote $ asString (DList x) where quote s = "\"" ++ concatMap escape s ++ "\"" @@ -227,7 +229,7 @@ instance Show Data where data DataTry = DNumT (Try (Det Rational)) - | DCharT (Try (Det Char)) + | DCharT (Try (Det Word8)) | DListT (TryList TryData) -- | Nekomata's data type (non-deterministic) @@ -269,7 +271,7 @@ toTryInt' x = x >>= toTryInt . unDet -- | Convert any @DataTry@ to a @Try (Det Rational)@ toTryNum :: DataTry -> Try (Det Rational) toTryNum (DNumT x) = x -toTryNum (DCharT x) = toTry x >>= maybe Fail (Val . Det) . charToInt +toTryNum (DCharT x) = toTry x >>= Val . Det . fromIntegral toTryNum (DListT _) = Fail -- | A helper class for lifting functions to @TryData@ @@ -291,9 +293,12 @@ instance ToTryData Integer where instance ToTryData Rational where toTryData = Val . DNumT . Val . Det -instance ToTryData Char where +instance ToTryData Word8 where toTryData = Val . DCharT . Val . Det +instance ToTryData Char where + toTryData = maybe Fail (Val . DCharT . Val . Det) . charToInt + instance (ToTryData a) => ToTryData [a] where toTryData = Val . DListT . Val . fromList . map toTryData @@ -354,14 +359,14 @@ liftInt2 :: liftInt2 f x y = toTryData $ liftM2 f (toTryInt' x) (toTryInt' y) -- | Lift a unary char function to @TryData@ -liftChar :: (ToTryData a) => (Char -> a) -> (Try (Det Char) -> TryData) +liftChar :: (ToTryData a) => (Word8 -> a) -> (Try (Det Word8) -> TryData) liftChar f = toTryData . fmap f . toTry -- | Lift a binary char function to @TryData@ liftChar2 :: (ToTryData a) => - (Char -> Char -> a) -> - (Try (Det Char) -> Try (Det Char) -> TryData) + (Word8 -> Word8 -> a) -> + (Try (Det Word8) -> Try (Det Word8) -> TryData) liftChar2 f x y = toTryData $ liftM2 f (toTry x) (toTry y) -- | Lift a unary list function to @TryData@ @@ -473,7 +478,7 @@ instance TryEq Integer where instance TryEq Rational where tryEq x y = Val $ x == y -instance TryEq Char where +instance TryEq Word8 where tryEq x y = Val $ x == y instance (Eq a) => TryEq (Det a) where @@ -525,7 +530,8 @@ instance TryOrd Integer where instance TryOrd Rational where tryCmp x y = Val $ compare x y -instance TryOrd Char where +instance TryOrd Word8 where + tryCmp :: Word8 -> Word8 -> Try Ordering tryCmp x y = Val $ compare x y instance (Ord a) => TryOrd (Det a) where diff --git a/src/Nekomata/Eval.hs b/src/Nekomata/Eval.hs index 5cce7d7..de7b429 100644 --- a/src/Nekomata/Eval.hs +++ b/src/Nekomata/Eval.hs @@ -42,7 +42,11 @@ initRuntime :: [Data] -> Runtime initRuntime = Runtime initId . initStack . map fromValue readInput :: String -> Either NekomataError [Data] -readInput = left ParseError . parse parseInput "" +readInput = + left CodePageError + . checkCodePage + >=> left ParseError + . parse parseInput "" -- | Run a Nekomata function with the given runtime state runFunction :: Function -> Runtime -> (Runtime, TryData) diff --git a/src/Nekomata/Parser/Data.hs b/src/Nekomata/Parser/Data.hs index ca73d34..9a3970c 100644 --- a/src/Nekomata/Parser/Data.hs +++ b/src/Nekomata/Parser/Data.hs @@ -1,6 +1,8 @@ module Nekomata.Parser.Data where import Data.Ratio ((%)) +import Data.Word (Word8) +import Nekomata.CodePage (charToInt) import Nekomata.Data import Text.Parsec import Text.Parsec.String (Parser) @@ -57,22 +59,36 @@ parseNum' = try parseRational' <|> fromInteger <$> parsePositive "number" parseEscape :: Parser Char parseEscape = char '\\' >> oneOf "\"\\'" +checkCodePage' :: Char -> Parser Word8 +checkCodePage' c = case charToInt c of + Just x' -> return x' + Nothing -> + parserFail + $ "Character '" + ++ [c] + ++ "' is not in Nekomata's code page." + -- | Parse a string literal -parseString :: Parser String +parseString :: Parser [Word8] parseString = between (char '"') (char '"') - (many $ try parseEscape <|> noneOf "\"") + (many (try parseEscape <|> noneOf "\"") >>= mapM checkCodePage') "string" -- | Parse a char literal -parseChar :: Parser Char -parseChar = between (char '\'') (optional $ char '\'') anyChar "char" +parseChar :: Parser Word8 +parseChar = + between + (char '\'') + (optional $ char '\'') + (anyChar >>= checkCodePage') + "char" -- | Parse a char literal, but without the right single quote -parseChar' :: Parser Char -parseChar' = char '\'' >> anyChar "char" +parseChar' :: Parser Word8 +parseChar' = char '\'' >> (anyChar >>= checkCodePage') "char" -- | Parse a list of Nekomata data parseList :: Parser [Data] diff --git a/test/Eval.hs b/test/Eval.hs index 4a52e1b..3db138a 100644 --- a/test/Eval.hs +++ b/test/Eval.hs @@ -3155,3 +3155,45 @@ testEval = describe "Evaluation" $ do , ("12 7", all_ ["[69968/16611]"]) , ("20 2", all_ ["[645315821032049/18278449721532]"]) ] + describe "q266332: Compute the conjugate of a partition" $ do + specEval + "1D∑" + [ ("[5,2,1]", all_ ["[3,2,1,1,1]"]) + , ("[4,3,1]", all_ ["[3,2,2,1]"]) + , ("[4,2,2]", all_ ["[3,3,1,1]"]) + , ("[3,3,2]", all_ ["[3,3,2]"]) + ] + specEval + "rjŢ" + [ ("[5,2,1]", all_ ["[3,2,1,1,1]"]) + , ("[4,3,1]", all_ ["[3,2,2,1]"]) + , ("[4,2,2]", all_ ["[3,3,1,1]"]) + , ("[3,3,2]", all_ ["[3,3,2]"]) + ] + describe "q266479: Find the fairest partition of a list" $ do + specEval + "ŋ∑Aaṁ" + [ ("[1,2,3]", all_ ["0"]) + , ("[2,3,5,7,11]", all_ ["0"]) + , ("[13,17,19,23]", all_ ["0"]) + , ("[1,2,3,4]", all_ ["0"]) + , ("[2,2,2,3,3]", all_ ["0"]) + , ("[1,2,3,4,5]", all_ ["1"]) + , ("[1,2,3,4,5,6]", all_ ["1"]) + , ("[1,1,2,5]", all_ ["1"]) + , ("[1,3,9,27]", all_ ["14"]) + ] + describe "q266561: aaabbabbc" $ do + specEval + "ĕ3¦¿ĭÐŤʳXH\"c\"I" + [ ("\"cbbbaab\"", all_ ["aa"]) + , ("\"bbbaabc\"", all_ ["aa"]) + , ("\"baaacbb\"", all_ ["ab"]) + , ("\"bbcaaba\"", all_ ["ab"]) + , ("\"bbcabaa\"", all_ ["ba"]) + , ("\"abcaabb\"", all_ ["ba"]) + , ("\"bacaaab\"", all_ ["bb"]) + , ("\"aacabba\"", all_ ["bb"]) + , ("\"bbabaaba\"", all_ ["c"]) + , ("\"aaabbabb\"", all_ ["c"]) + ]