From 277d03b4757e1d604ece76a2b9905101b9d33d2a Mon Sep 17 00:00:00 2001 From: Brian McKeon <135748266+brianjosephmckeon@users.noreply.github.com> Date: Fri, 2 Feb 2024 21:37:18 -0500 Subject: [PATCH] Prepare 0.3.16.1 release Reformatted. Added workflows. Updated package metadata. --- .github/CODEOWNERS | 1 + .github/workflows/build.yaml | 12 + .github/workflows/release.yaml | 12 + .gitignore | 2 + CHANGELOG.md | 4 +- Setup.hs | 2 - bench/Cell.hs | 31 +- bench/Main.hs | 132 +- bench/SimpleCsv.hs | 22 +- bytebuild.cabal | 143 ++- common/HexWord64.hs | 45 +- common/Word16Tree.hs | 91 +- fourmolu.yaml | 51 + src-9.0/Compat.hs | 24 +- src-9.2/Compat.hs | 6 +- src-checked/Op.hs | 16 +- src-unchecked/Op.hs | 4 +- src/Data/Bytes/Builder.hs | 1457 ++++++++++++---------- src/Data/Bytes/Builder/Avro.hs | 60 +- src/Data/Bytes/Builder/Bounded.hs | 915 ++++++++------ src/Data/Bytes/Builder/Bounded/Class.hs | 23 +- src/Data/Bytes/Builder/Bounded/Unsafe.hs | 70 +- src/Data/Bytes/Builder/Class.hs | 22 +- src/Data/Bytes/Builder/Template.hs | 125 +- src/Data/Bytes/Builder/Unsafe.hs | 413 +++--- test/Main.hs | 651 +++++----- 26 files changed, 2408 insertions(+), 1926 deletions(-) create mode 100644 .github/CODEOWNERS create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/release.yaml delete mode 100644 Setup.hs create mode 100644 fourmolu.yaml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f6c0b22 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..085bbaf --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,12 @@ +name: build +on: + pull_request: + branches: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build.yaml@main + secrets: inherit + with: + release: false diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..bd0bbd5 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,12 @@ +name: release +on: + push: + tags: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build.yaml@main + secrets: inherit + with: + release: true diff --git a/.gitignore b/.gitignore index 28d589b..ccd94e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist dist-* cabal-dev @@ -11,6 +12,7 @@ cabal-dev .hsenv .cabal-sandbox/ cabal.sandbox.config +cabal.project.local *.prof *.aux *.hp diff --git a/CHANGELOG.md b/CHANGELOG.md index 30773a2..bc385a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Note: Prior to version 0.3.4.0, this library was named `small-bytearray-builder` is now just a compatibility shim to ease the migration process. -## 0.3.16.1 -- 2024-??-?? +## 0.3.16.1 -- 2024-02-02 * Remove all CPP * Drop support for GHC < 9.4 @@ -141,7 +141,7 @@ to ease the migration process. * Add `flush`, `copy`, and `insert` for better control when converting byte sequences to builders. * Add `shortByteString` to improve interoperability with the - `bytestring` library. + `bytestring` library. ## 0.2.1.0 -- 2019-09-05 diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/bench/Cell.hs b/bench/Cell.hs index 77d4f61..3c76016 100644 --- a/bench/Cell.hs +++ b/bench/Cell.hs @@ -1,14 +1,14 @@ -{-# language OverloadedLists #-} -{-# language OverloadedStrings #-} +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} module Cell - ( Cell(..) + ( Cell (..) , cells ) where -import Data.Word (Word32) -import Data.Text.Short (ShortText) import Data.Primitive (SmallArray) +import Data.Text.Short (ShortText) +import Data.Word (Word32) -- A cell in a CSV file data Cell @@ -18,15 +18,14 @@ data Cell -- Some sample data to encode as a CSV cells :: SmallArray (SmallArray Cell) cells = - [ [ CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343 ] - , [ CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453 ] - , [ CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634 ] - , [ CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922 ] - , [ CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617 ] - , [ CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150 ] - , [ CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299 ] - , [ CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214 ] - , [ CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918 ] - , [ CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904 ] + [ [CellString "Randy", CellString "Gutiérrez", CellNumber 41, CellNumber 343] + , [CellString "Édith", CellString "Piaf", CellNumber 63, CellNumber 453] + , [CellString "Martha", CellString "Washington", CellNumber 51, CellNumber 634] + , [CellString "Julius", CellString "Caesar", CellNumber 1, CellNumber 6922] + , [CellString "Robert", CellString "Redford", CellNumber 24, CellNumber 617] + , [CellString "Violet", CellString "Crawley", CellNumber 71, CellNumber 150] + , [CellString "Lázaro", CellString "Cárdenas", CellNumber 58, CellNumber 299] + , [CellString "Anastasia", CellString "San Martin", CellNumber 103, CellNumber 3214] + , [CellString "Mad", CellString "Max", CellNumber 37, CellNumber 918] + , [CellString "Sidonie-Gabrielle", CellString "Collette", CellNumber 25, CellNumber 904] ] - diff --git a/bench/Main.hs b/bench/Main.hs index cc29af6..1c93f57 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -1,9 +1,8 @@ -{-# language LambdaCase #-} -{-# language OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings #-} import Data.Primitive (ByteArray) import Data.Word (Word64) -import Gauge (bgroup,bench,whnf) +import Gauge (bench, bgroup, whnf) import Gauge.Main (defaultMain) import qualified Arithmetic.Nat as Nat @@ -11,70 +10,87 @@ import qualified Data.Bytes.Builder as B import qualified Data.Bytes.Builder.Bounded as U import qualified Cell -import qualified SimpleCsv import qualified HexWord64 +import qualified SimpleCsv import qualified Word16Tree main :: IO () -main = defaultMain - [ bgroup "w64" - [ bgroup "hex" - [ bench "library" (whnf encodeHexWord64s w64s) - , bench "loop" (whnf encodeHexWord64sLoop w64s) - ] +main = + defaultMain + [ bgroup + "w64" + [ bgroup + "hex" + [ bench "library" (whnf encodeHexWord64s w64s) + , bench "loop" (whnf encodeHexWord64sLoop w64s) + ] + ] + , bgroup + "unbounded" + [ bench "csv-no-escape" $ + whnf + (\x -> B.run 4080 (SimpleCsv.encodeRows x)) + Cell.cells + , bench "word-16-tree-small" $ + whnf + (\x -> B.run 4080 (Word16Tree.encode x)) + Word16Tree.exampleSmall + , bench "word-16-tree-2000" $ + whnf + (\x -> B.run ((4096 * 16) - 16) (Word16Tree.encode x)) + Word16Tree.example2000 + , bench "word-16-tree-9000" $ + whnf + (\x -> B.run ((4096 * 64) - 16) (Word16Tree.encode x)) + Word16Tree.example9000 + ] ] - , bgroup "unbounded" - [ bench "csv-no-escape" $ whnf - (\x -> B.run 4080 (SimpleCsv.encodeRows x)) - Cell.cells - , bench "word-16-tree-small" $ whnf - (\x -> B.run 4080 (Word16Tree.encode x)) - Word16Tree.exampleSmall - , bench "word-16-tree-2000" $ whnf - (\x -> B.run ((4096 * 16) - 16) (Word16Tree.encode x)) - Word16Tree.example2000 - , bench "word-16-tree-9000" $ whnf - (\x -> B.run ((4096 * 64) - 16) (Word16Tree.encode x)) - Word16Tree.example9000 - ] - ] w64s :: Word64s -w64s = Word64s - 0xde2b8a480cf77113 - 0x48f1668ca2a68b45 - 0xd262fbaa0b2f473c - 0xbab20547f4919d9f - 0xb7ec16121704db43 - 0x9c259f5bfa90e1eb - 0xd451eca11d9873ad - 0xbd927e8d4c879d02 +w64s = + Word64s + 0xde2b8a480cf77113 + 0x48f1668ca2a68b45 + 0xd262fbaa0b2f473c + 0xbab20547f4919d9f + 0xb7ec16121704db43 + 0x9c259f5bfa90e1eb + 0xd451eca11d9873ad + 0xbd927e8d4c879d02 -data Word64s = Word64s - !Word64 !Word64 !Word64 !Word64 - !Word64 !Word64 !Word64 !Word64 +data Word64s + = Word64s + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 + !Word64 encodeHexWord64s :: Word64s -> ByteArray -{-# noinline encodeHexWord64s #-} -encodeHexWord64s (Word64s a b c d e f g h) = U.run Nat.constant $ - U.word64PaddedUpperHex a `U.append` - U.word64PaddedUpperHex b `U.append` - U.word64PaddedUpperHex c `U.append` - U.word64PaddedUpperHex d `U.append` - U.word64PaddedUpperHex e `U.append` - U.word64PaddedUpperHex f `U.append` - U.word64PaddedUpperHex g `U.append` - U.word64PaddedUpperHex h +{-# NOINLINE encodeHexWord64s #-} +encodeHexWord64s (Word64s a b c d e f g h) = + U.run Nat.constant $ + U.word64PaddedUpperHex a + `U.append` U.word64PaddedUpperHex b + `U.append` U.word64PaddedUpperHex c + `U.append` U.word64PaddedUpperHex d + `U.append` U.word64PaddedUpperHex e + `U.append` U.word64PaddedUpperHex f + `U.append` U.word64PaddedUpperHex g + `U.append` U.word64PaddedUpperHex h encodeHexWord64sLoop :: Word64s -> ByteArray -{-# noinline encodeHexWord64sLoop #-} -encodeHexWord64sLoop (Word64s a b c d e f g h) = U.run Nat.constant $ - HexWord64.word64PaddedUpperHex a `U.append` - HexWord64.word64PaddedUpperHex b `U.append` - HexWord64.word64PaddedUpperHex c `U.append` - HexWord64.word64PaddedUpperHex d `U.append` - HexWord64.word64PaddedUpperHex e `U.append` - HexWord64.word64PaddedUpperHex f `U.append` - HexWord64.word64PaddedUpperHex g `U.append` - HexWord64.word64PaddedUpperHex h - +{-# NOINLINE encodeHexWord64sLoop #-} +encodeHexWord64sLoop (Word64s a b c d e f g h) = + U.run Nat.constant $ + HexWord64.word64PaddedUpperHex a + `U.append` HexWord64.word64PaddedUpperHex b + `U.append` HexWord64.word64PaddedUpperHex c + `U.append` HexWord64.word64PaddedUpperHex d + `U.append` HexWord64.word64PaddedUpperHex e + `U.append` HexWord64.word64PaddedUpperHex f + `U.append` HexWord64.word64PaddedUpperHex g + `U.append` HexWord64.word64PaddedUpperHex h diff --git a/bench/SimpleCsv.hs b/bench/SimpleCsv.hs index 1b47345..34a2ded 100644 --- a/bench/SimpleCsv.hs +++ b/bench/SimpleCsv.hs @@ -1,4 +1,4 @@ -{-# language LambdaCase #-} +{-# LANGUAGE LambdaCase #-} -- A variant of CSV encoding that does not perform -- any escaping or quoting. This is in its own module @@ -8,22 +8,24 @@ module SimpleCsv ( encodeRows ) where -import Cell (Cell(..)) +import Cell (Cell (..)) import Data.Primitive (SmallArray) -import qualified Data.Foldable as F import qualified Data.Bytes.Builder as B +import qualified Data.Foldable as F encodeRows :: SmallArray (SmallArray Cell) -> B.Builder -encodeRows = F.foldr - (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) - mempty +encodeRows = + F.foldr + (\r x -> encodeSimpleCsvRow r (B.ascii '\n' <> x)) + mempty encodeSimpleCsvRow :: SmallArray Cell -> B.Builder -> B.Builder -encodeSimpleCsvRow cs b = F.foldr - (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) - b - cs +encodeSimpleCsvRow cs b = + F.foldr + (\c x -> encodeSimpleCsvCell c <> B.ascii ',' <> x) + b + cs encodeSimpleCsvCell :: Cell -> B.Builder encodeSimpleCsvCell = \case diff --git a/bytebuild.cabal b/bytebuild.cabal index 066a235..46918ab 100644 --- a/bytebuild.cabal +++ b/bytebuild.cabal @@ -1,17 +1,17 @@ -cabal-version: 2.2 -name: bytebuild -version: 0.3.16.1 -synopsis: Build byte arrays +cabal-version: 2.2 +name: bytebuild +version: 0.3.16.1 +synopsis: Build byte arrays description: This is similar to the builder facilities provided by `Data.ByteString.Builder`. It is intended to be used in situations where the following apply: . * An individual entity will be serialized as a small - number of bytes (less than 512). + number of bytes (less than 512). . * A large number (more than 32) of entities will be serialized - one after another without anything between them. + one after another without anything between them. . Unlike builders from the `bytestring` package, these builders do not track their state when they run out of space. A builder @@ -19,110 +19,123 @@ description: of the next chunk. This strategy for building is suitable for most CSVs and several line protocols (carbon, InfluxDB, etc.). -homepage: https://github.com/byteverse/bytebuild -bug-reports: https://github.com/byteverse/bytebuild/issues -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: andrew.thaddeus@gmail.com -copyright: 2019 Andrew Martin -category: Data -extra-source-files: CHANGELOG.md +homepage: https://github.com/byteverse/bytebuild +bug-reports: https://github.com/byteverse/bytebuild/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: amartin@layer3com.com +copyright: 2019 Andrew Martin +category: Data +extra-doc-files: CHANGELOG.md + +common build-settings + default-language: Haskell2010 + ghc-options: -Wall -Wunused-packages flag checked - manual: True + manual: True description: Add bounds-checking to primitive array operations - default: False + default: False library + import: build-settings exposed-modules: Data.Bytes.Builder Data.Bytes.Builder.Avro - Data.Bytes.Builder.Class - Data.Bytes.Builder.Template - Data.Bytes.Builder.Unsafe Data.Bytes.Builder.Bounded Data.Bytes.Builder.Bounded.Class Data.Bytes.Builder.Bounded.Unsafe + Data.Bytes.Builder.Class + Data.Bytes.Builder.Template + Data.Bytes.Builder.Unsafe + other-modules: Compat Op - reexported-modules: - Data.Bytes.Chunks + + reexported-modules: Data.Bytes.Chunks build-depends: - , base >=4.17.0.0 && <4.20 - , byteslice >=0.2.6 && <0.3 - , bytestring >=0.10.8.2 && <0.13 - , haskell-src-meta >=0.8.13 - , integer-logarithms >=1.0.3 && <1.1 - , natural-arithmetic >=0.1 && <0.3 - , primitive-offset >=0.2 && <0.3 - , run-st >=0.1.2 && <0.2 - , template-haskell >=2.16 - , text >=2.0 && <2.2 - , text-short >=0.1.3 && <0.2 - , wide-word >=0.1.0.9 && <0.2 + , base >=4.17.0.0 && <4.20 + , byteslice >=0.2.6 && <0.3 + , bytestring >=0.10.8.2 && <0.13 + , haskell-src-meta >=0.8.13 + , integer-logarithms >=1.0.3 && <1.1 + , natural-arithmetic >=0.1 && <0.3 + , primitive-offset >=0.2 && <0.3 + , run-st >=0.1.2 && <0.2 + , template-haskell >=2.16 + , text >=2.0 && <2.2 + , text-short >=0.1.3 && <0.2 + , wide-word >=0.1.0.9 && <0.2 , zigzag - if impl(ghc >= 9.2) + + if impl(ghc >=9.2) hs-source-dirs: src-9.2 + else - if impl(ghc >= 8.10) + if impl(ghc >=8.10) hs-source-dirs: src-9.0 + if flag(checked) - build-depends: primitive-checked >= 0.7 && <0.10 + build-depends: primitive-checked >=0.7 && <0.10 hs-source-dirs: src-checked + else - build-depends: primitive >= 0.7 && <0.10 + build-depends: primitive >=0.7 && <0.10 hs-source-dirs: src-unchecked - ghc-options: -Wall -O2 - hs-source-dirs: src - default-language: Haskell2010 - c-sources: cbits/bytebuild_custom.c + + ghc-options: -O2 + hs-source-dirs: src + c-sources: cbits/bytebuild_custom.c test-suite test - default-language: Haskell2010 - type: exitcode-stdio-1.0 - hs-source-dirs: test, common - main-is: Main.hs - ghc-options: -O2 -Wall + import: build-settings + type: exitcode-stdio-1.0 + hs-source-dirs: test common + main-is: Main.hs + ghc-options: -O2 other-modules: HexWord64 Word16Tree + build-depends: - , QuickCheck >=2.13.1 && <2.15 - , base >=4.12.0.0 && <5 + , base >=4.12.0.0 && <5 , bytebuild , byteslice , bytestring , natural-arithmetic , primitive - , primitive-unlifted >=0.1.2 - , quickcheck-classes >=0.6.4 - , quickcheck-instances >=0.3.22 + , QuickCheck >=2.13.1 && <2.15 + , quickcheck-instances >=0.3.22 + , tasty >=1.2.3 && <1.6 + , tasty-hunit >=0.10.0.2 && <0.11 + , tasty-quickcheck >=0.10.1 && <0.11 + , text >=2.0 && <2.2 , text-short - , tasty >=1.2.3 && <1.6 - , tasty-hunit >=0.10.0.2 && <0.11 - , tasty-quickcheck >=0.10.1 && <0.11 - , text >=2.0 && <2.2 - , vector - , wide-word >=0.1.0.9 && <0.2 + , wide-word >=0.1.0.9 && <0.2 benchmark bench - type: exitcode-stdio-1.0 + import: build-settings + type: exitcode-stdio-1.0 build-depends: , base , bytebuild - , gauge >= 0.2.4 + , byteslice + , gauge >=0.2.4 , natural-arithmetic , primitive , text-short - , byteslice - ghc-options: -Wall -O2 - default-language: Haskell2010 - hs-source-dirs: bench, common - main-is: Main.hs + + ghc-options: -O2 + hs-source-dirs: bench common + main-is: Main.hs other-modules: Cell HexWord64 SimpleCsv Word16Tree + +source-repository head + type: git + location: git://github.com/byteverse/bytebuild.git diff --git a/common/HexWord64.hs b/common/HexWord64.hs index b3af3c8..23acfe4 100644 --- a/common/HexWord64.hs +++ b/common/HexWord64.hs @@ -1,10 +1,10 @@ -{-# language BangPatterns #-} -{-# language ScopedTypeVariables #-} -{-# language DataKinds #-} -{-# language UnboxedTuples #-} -{-# language MagicHash #-} -{-# language PolyKinds #-} -{-# language TypeApplications #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} module HexWord64 ( word64PaddedUpperHex @@ -15,34 +15,37 @@ module HexWord64 -- the hoop jumping, the explicit loop used here is still outperformed -- by just inlining the loop. -import GHC.ST (ST(ST)) import Data.Bits -import Data.Bytes.Builder.Bounded.Unsafe (Builder,construct) +import Data.Bytes.Builder.Bounded.Unsafe (Builder, construct) import Data.Primitive import Data.Word import GHC.Exts +import GHC.ST (ST (ST)) import qualified Control.Monad.Primitive as PM type ST# s (a :: TYPE (r :: RuntimeRep)) = State# s -> (# State# s, a #) word64PaddedUpperHex :: Word64 -> Builder 16 -word64PaddedUpperHex w = construct $ \a b -> ST - (\s0 -> case word64PaddedUpperHexLoop w 60 a b s0 of - (# s1, i #) -> (# s1, I# i #) - ) +word64PaddedUpperHex w = construct $ \a b -> + ST + ( \s0 -> case word64PaddedUpperHexLoop w 60 a b s0 of + (# s1, i #) -> (# s1, I# i #) + ) word64PaddedUpperHexLoop :: forall s. Word64 -> Int -> MutableByteArray s -> Int -> ST# s Int# -word64PaddedUpperHexLoop !w !shiftAmount !arr !i@(I# i#) s0 = if shiftAmount >= 0 - then case PM.internal @(ST s) (writeByteArray arr i (toHexUpper (unsafeShiftR w shiftAmount))) s0 of - (# s1, (_ :: ()) #) -> word64PaddedUpperHexLoop w (shiftAmount - 4) arr (i + 1) s1 - else (# s0, i# #) +word64PaddedUpperHexLoop !w !shiftAmount !arr !i@(I# i#) s0 = + if shiftAmount >= 0 + then case PM.internal @(ST s) (writeByteArray arr i (toHexUpper (unsafeShiftR w shiftAmount))) s0 of + (# s1, (_ :: ()) #) -> word64PaddedUpperHexLoop w (shiftAmount - 4) arr (i + 1) s1 + else (# s0, i# #) toHexUpper :: Word64 -> Word8 -toHexUpper w' = fromIntegral - $ (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexUpper w' = + fromIntegral $ + (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 diff --git a/common/Word16Tree.hs b/common/Word16Tree.hs index 2163a5c..a07d056 100644 --- a/common/Word16Tree.hs +++ b/common/Word16Tree.hs @@ -1,4 +1,4 @@ -{-# language BangPatterns #-} +{-# LANGUAGE BangPatterns #-} module Word16Tree ( Word16Tree @@ -9,11 +9,11 @@ module Word16Tree , expectedSmall ) where -import Data.Bytes.Builder as B -import Data.Word (Word16) -import Data.Primitive (ByteArray) import qualified Data.Bytes as Bytes +import Data.Bytes.Builder as B import qualified Data.Bytes.Text.Ascii +import Data.Primitive (ByteArray) +import Data.Word (Word16) data Word16Tree = Branch !Word16Tree !Word16Tree @@ -23,63 +23,62 @@ encode :: Word16Tree -> Builder encode (Leaf w) = B.word16PaddedUpperHex w encode (Branch a b) = B.ascii '(' - <> - encode a - <> - B.ascii ',' - <> - encode b - <> - B.ascii ')' + <> encode a + <> B.ascii ',' + <> encode b + <> B.ascii ')' expectedSmall :: ByteArray -expectedSmall = Bytes.toByteArray $ Data.Bytes.Text.Ascii.fromString - "((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))" - +expectedSmall = + Bytes.toByteArray $ + Data.Bytes.Text.Ascii.fromString + "((AB59,(1F33,2E71)),((((FA9A,247B),890C),(0F13,((55BF,7CF1),389B))),1205))" exampleSmall :: Word16Tree -exampleSmall = Branch - (Branch - (Leaf 0xAB59) - (Branch - (Leaf 0x1F33) - (Leaf 0x2E71) - ) - ) - (Branch - (Branch - (Branch - (Branch - (Leaf 0xFA9A) - (Leaf 0x247B) +exampleSmall = + Branch + ( Branch + (Leaf 0xAB59) + ( Branch + (Leaf 0x1F33) + (Leaf 0x2E71) ) - (Leaf 0x890C) - ) - (Branch - (Leaf 0x0F13) - (Branch - (Branch - (Leaf 0x55BF) - (Leaf 0x7CF1) - ) - (Leaf 0x389B) + ) + ( Branch + ( Branch + ( Branch + ( Branch + (Leaf 0xFA9A) + (Leaf 0x247B) + ) + (Leaf 0x890C) + ) + ( Branch + (Leaf 0x0F13) + ( Branch + ( Branch + (Leaf 0x55BF) + (Leaf 0x7CF1) + ) + (Leaf 0x389B) + ) + ) ) - ) + (Leaf 0x1205) ) - (Leaf 0x1205) - ) example2000 :: Word16Tree -{-# noinline example2000 #-} +{-# NOINLINE example2000 #-} example2000 = balanced 0 2000 example9000 :: Word16Tree -{-# noinline example9000 #-} +{-# NOINLINE example9000 #-} example9000 = balanced 0 9000 balanced :: Word16 -> Word16 -> Word16Tree balanced !off !n | n == 0 = Leaf off | n == 1 = Leaf (off + 1) - | otherwise = let x = div n 2 in - Branch (balanced off x) (balanced (off + x) (n - x)) + | otherwise = + let x = div n 2 + in Branch (balanced off x) (balanced (off + x) (n - x)) diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..40cd005 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,51 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: 200 + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: leading + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: true + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + diff --git a/src-9.0/Compat.hs b/src-9.0/Compat.hs index b7f978a..bf82334 100644 --- a/src-9.0/Compat.hs +++ b/src-9.0/Compat.hs @@ -1,4 +1,4 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} -- This is actually used with both GHC 8.10 and with GHC 9.0. -- The name of the directory is a little misleading. @@ -14,40 +14,40 @@ module Compat , word32ToWord# ) where -import GHC.Exts (Int#,Word#) +import GHC.Exts (Int#, Word#) int8ToInt# :: Int# -> Int# -{-# inline int8ToInt# #-} +{-# INLINE int8ToInt# #-} int8ToInt# x = x int16ToInt# :: Int# -> Int# -{-# inline int16ToInt# #-} +{-# INLINE int16ToInt# #-} int16ToInt# x = x int32ToInt# :: Int# -> Int# -{-# inline int32ToInt# #-} +{-# INLINE int32ToInt# #-} int32ToInt# x = x wordToWord8# :: Word# -> Word# -{-# inline wordToWord8# #-} -wordToWord8# x = x +{-# INLINE wordToWord8# #-} +wordToWord8# x = x wordToWord16# :: Word# -> Word# -{-# inline wordToWord16# #-} +{-# INLINE wordToWord16# #-} wordToWord16# x = x wordToWord32# :: Word# -> Word# -{-# inline wordToWord32# #-} +{-# INLINE wordToWord32# #-} wordToWord32# x = x word8ToWord# :: Word# -> Word# -{-# inline word8ToWord# #-} +{-# INLINE word8ToWord# #-} word8ToWord# x = x word16ToWord# :: Word# -> Word# -{-# inline word16ToWord# #-} +{-# INLINE word16ToWord# #-} word16ToWord# x = x word32ToWord# :: Word# -> Word# -{-# inline word32ToWord# #-} +{-# INLINE word32ToWord# #-} word32ToWord# x = x diff --git a/src-9.2/Compat.hs b/src-9.2/Compat.hs index 177c94c..fb5a93b 100644 --- a/src-9.2/Compat.hs +++ b/src-9.2/Compat.hs @@ -1,10 +1,10 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} -module Compat +module Compat ( int8ToInt# , int16ToInt# , int32ToInt# - , wordToWord8# + , wordToWord8# , wordToWord16# , wordToWord32# , word8ToWord# diff --git a/src-checked/Op.hs b/src-checked/Op.hs index a9b4211..d41684d 100644 --- a/src-checked/Op.hs +++ b/src-checked/Op.hs @@ -1,5 +1,5 @@ -{-# language MagicHash #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} module Op ( writeCharArray# @@ -7,9 +7,9 @@ module Op , copyMutableByteArray# ) where -import GHC.Exts ((<#),(>=#),State#,Int#,MutableByteArray#,ByteArray#,Char#) -import GHC.Int (Int(I#)) +import GHC.Exts (ByteArray#, Char#, Int#, MutableByteArray#, State#, (<#), (>=#)) import qualified GHC.Exts as Exts +import GHC.Int (Int (I#)) writeCharArray# :: MutableByteArray# s -> Int# -> Char# -> State# s -> State# s writeCharArray# arr i v st = case i <# 0# of @@ -27,8 +27,8 @@ copyByteArray# src soff dst doff len s0 = , I# doff >= 0 , I# len >= 0 , I# doff + I# len <= I# sz - , I# soff + I# len <= I# (Exts.sizeofByteArray# src) - -> Exts.copyByteArray# src soff dst doff len s1 + , I# soff + I# len <= I# (Exts.sizeofByteArray# src) -> + Exts.copyByteArray# src soff dst doff len s1 | otherwise -> error "copyByteArray#: index range out of bounds" copyMutableByteArray# :: MutableByteArray# s -> Int# -> MutableByteArray# s -> Int# -> Int# -> State# s -> State# s @@ -40,6 +40,6 @@ copyMutableByteArray# src soff dst doff len s0 = , I# doff >= 0 , I# len >= 0 , I# doff + I# len <= I# szDst - , I# soff + I# len <= I# szSrc - -> Exts.copyMutableByteArray# src soff dst doff len s2 + , I# soff + I# len <= I# szSrc -> + Exts.copyMutableByteArray# src soff dst doff len s2 | otherwise -> error "copyMutableByteArray#: index range out of bounds" diff --git a/src-unchecked/Op.hs b/src-unchecked/Op.hs index 32874e2..9092c35 100644 --- a/src-unchecked/Op.hs +++ b/src-unchecked/Op.hs @@ -1,4 +1,4 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} module Op ( writeCharArray# @@ -6,4 +6,4 @@ module Op , copyMutableByteArray# ) where -import GHC.Exts (copyMutableByteArray#,writeCharArray#,copyByteArray#,copyMutableByteArray#) +import GHC.Exts (copyByteArray#, copyMutableByteArray#, writeCharArray#) diff --git a/src/Data/Bytes/Builder.hs b/src/Data/Bytes/Builder.hs index 93524cf..1a89a4a 100644 --- a/src/Data/Bytes/Builder.hs +++ b/src/Data/Bytes/Builder.hs @@ -1,18 +1,18 @@ -{-# language BangPatterns #-} -{-# language DataKinds #-} -{-# language DuplicateRecordFields #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language NumericUnderscores #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Builder ( -- * Bounded Primitives Builder , fromBounded + -- * Evaluation , run , runOnto @@ -20,6 +20,7 @@ module Data.Bytes.Builder , reversedOnto , putMany , putManyConsLength + -- * Materialized Byte Sequences , bytes , chunks @@ -37,10 +38,13 @@ module Data.Bytes.Builder , cstring# , cstringLen , stringUtf8 + -- * Byte Sequence Encodings , sevenEightRight , sevenEightSmile + -- * Encode Integral Types + -- ** Human-Readable , word64Dec , word32Dec @@ -54,16 +58,21 @@ module Data.Bytes.Builder , int8Dec , intDec , integerDec + -- * Unsigned Words + -- ** 64-bit , word64PaddedUpperHex + -- ** 32-bit , word32PaddedUpperHex + -- ** 16-bit , word16PaddedUpperHex , word16PaddedLowerHex , word16LowerHex , word16UpperHex + -- ** 8-bit , word8PaddedUpperHex , word8LowerHex @@ -76,9 +85,12 @@ module Data.Bytes.Builder , ascii7 , ascii8 , char + -- ** Machine-Readable + -- *** One , word8 + -- **** Big Endian , word256BE , word128BE @@ -88,6 +100,7 @@ module Data.Bytes.Builder , int64BE , int32BE , int16BE + -- **** Little Endian , word256LE , word128LE @@ -97,6 +110,7 @@ module Data.Bytes.Builder , int64LE , int32LE , int16LE + -- **** LEB128 , intLEB128 , int32LEB128 @@ -105,12 +119,15 @@ module Data.Bytes.Builder , word16LEB128 , word32LEB128 , word64LEB128 + -- **** VLQ , wordVlq , word32Vlq , word64Vlq + -- *** Many , word8Array + -- **** Big Endian , word16ArrayBE , word32ArrayBE @@ -120,6 +137,7 @@ module Data.Bytes.Builder , int64ArrayBE , int32ArrayBE , int16ArrayBE + -- **** Little Endian , word16ArrayLE , word32ArrayLE @@ -129,56 +147,67 @@ module Data.Bytes.Builder , int64ArrayLE , int32ArrayLE , int16ArrayLE + -- ** Prefixing with Length , consLength , consLength32LE , consLength32BE , consLength64BE + -- * Encode Floating-Point Types + -- ** Human-Readable , doubleDec + -- * Replication , replicate + -- * Control , flush + -- * Rebuild , rebuild ) where import Prelude hiding (replicate) -import Control.Exception (SomeException,toException) -import Control.Monad.IO.Class (MonadIO,liftIO) -import Control.Monad.ST (ST,runST) -import Data.Bits ((.&.),(.|.),unsafeShiftL,unsafeShiftR) -import Data.Bytes.Builder.Unsafe (addCommitsLength,copyReverseCommits) -import Data.Bytes.Builder.Unsafe (Builder(Builder),commitDistance1) -import Data.Bytes.Builder.Unsafe (BuilderState(BuilderState),pasteIO) -import Data.Bytes.Builder.Unsafe (Commits(Initial,Mutable,Immutable)) -import Data.Bytes.Builder.Unsafe (commitsOntoChunks) -import Data.Bytes.Builder.Unsafe (reverseCommitsOntoChunks) -import Data.Bytes.Builder.Unsafe (stringUtf8,cstring,fromEffect) -import Data.Bytes.Builder.Unsafe (pasteUtf8TextJson#) -import Data.Bytes.Chunks (Chunks(ChunksCons,ChunksNil)) -import Data.Bytes.Types (Bytes(Bytes),MutableBytes(MutableBytes)) -import Data.ByteString.Short.Internal (ShortByteString(SBS)) +import Control.Exception (SomeException, toException) +import Control.Monad.IO.Class (MonadIO, liftIO) +import Control.Monad.ST (ST, runST) +import Data.Bits (unsafeShiftL, unsafeShiftR, (.&.), (.|.)) +import Data.ByteString.Short.Internal (ShortByteString (SBS)) +import Data.Bytes.Builder.Unsafe + ( Builder (Builder) + , BuilderState (BuilderState) + , Commits (Immutable, Initial, Mutable) + , addCommitsLength + , commitDistance1 + , commitsOntoChunks + , copyReverseCommits + , cstring + , fromEffect + , pasteIO + , pasteUtf8TextJson# + , reverseCommitsOntoChunks + , stringUtf8 + ) +import Data.Bytes.Chunks (Chunks (ChunksCons, ChunksNil)) +import Data.Bytes.Types (Bytes (Bytes), MutableBytes (MutableBytes)) import Data.Foldable (foldlM) -import Data.Int (Int64,Int32,Int16,Int8) -import Data.Primitive (ByteArray(..),MutableByteArray(..),PrimArray(..)) +import Data.Int (Int16, Int32, Int64, Int8) +import Data.Primitive (ByteArray (..), MutableByteArray (..), PrimArray (..)) import Data.Text.Short (ShortText) -import Data.WideWord (Word128,Word256) -import Data.Word (Word64,Word32,Word16,Word8) -import Data.Word.Zigzag (toZigzagNative,toZigzag32,toZigzag64) +import Data.WideWord (Word128, Word256) +import Data.Word (Word16, Word32, Word64, Word8) +import Data.Word.Zigzag (toZigzag32, toZigzag64, toZigzagNative) import Foreign.C.String (CStringLen) -import GHC.ByteOrder (ByteOrder(BigEndian,LittleEndian),targetByteOrder) -import GHC.Exts (MutableByteArray#,Addr#,(*#),oneShot) -import GHC.Exts (Int(I#),Int#,State#,ByteArray#,(>=#)) -import GHC.Exts (RealWorld,(+#),(-#),(<#)) +import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) +import GHC.Exts (Addr#, ByteArray#, Int (I#), Int#, MutableByteArray#, RealWorld, State#, oneShot, (*#), (+#), (-#), (<#), (>=#)) +import GHC.IO (IO (IO), stToIO) import GHC.Integer.Logarithms.Compat (integerLog2#) -import GHC.IO (IO(IO),stToIO) -import GHC.Natural (naturalFromInteger,naturalToInteger) -import GHC.ST (ST(ST)) -import GHC.Word (Word(W#),Word8(W8#)) +import GHC.Natural (naturalFromInteger, naturalToInteger) +import GHC.ST (ST (ST)) +import GHC.Word (Word (W#), Word8 (W8#)) import Numeric.Natural (Natural) import qualified Compat as C @@ -194,183 +223,232 @@ import qualified GHC.Exts as Exts import qualified Op as Op import Data.Text (Text) -import qualified Data.Text.Internal as I import qualified Data.Text.Array as A +import qualified Data.Text.Internal as I -- | Run a builder. run :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + Chunks run !hint bldr = runOnto hint bldr ChunksNil --- | Run a builder. The resulting chunks are consed onto the --- beginning of an existing sequence of chunks. +{- | Run a builder. The resulting chunks are consed onto the +beginning of an existing sequence of chunks. +-} runOnto :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks -- ^ Suffix - -> Chunks -runOnto hint@(I# hint# ) (Builder f) cs0 = runST $ do + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + -- | Suffix + Chunks -> + Chunks +runOnto hint@(I# hint#) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) reverseCommitsOntoChunks cs0 cs --- | Variant of 'runOnto' that additionally returns the number of bytes --- consed onto the suffix. +{- | Variant of 'runOnto' that additionally returns the number of bytes +consed onto the suffix. +-} runOntoLength :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks -- ^ Suffix - -> (Int,Chunks) -runOntoLength hint@(I# hint# ) (Builder f) cs0 = runST $ do + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + -- | Suffix + Chunks -> + (Int, Chunks) +runOntoLength hint@(I# hint#) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) let !n = addCommitsLength 0 cs ch <- reverseCommitsOntoChunks cs0 cs - pure (n,ch) + pure (n, ch) --- | Variant of 'runOnto' that conses the additional chunks --- in reverse order. +{- | Variant of 'runOnto' that conses the additional chunks +in reverse order. +-} reversedOnto :: - Int -- ^ Size of initial chunk (use 4080 if uncertain) - -> Builder -- ^ Builder - -> Chunks - -> Chunks -reversedOnto hint@(I# hint# ) (Builder f) cs0 = runST $ do + -- | Size of initial chunk (use 4080 if uncertain) + Int -> + -- | Builder + Builder -> + Chunks -> + Chunks +reversedOnto hint@(I# hint#) (Builder f) cs0 = runST $ do MutableByteArray buf0 <- PM.newByteArray hint cs <- ST $ \s0 -> case f buf0 0# hint# Initial s0 of (# s1, bufX, offX, _, csX #) -> (# s1, Mutable bufX offX csX #) commitsOntoChunks cs0 cs --- | Run a builder against lots of elements. This fills the same --- underlying buffer over and over again. Do not let the argument to --- the callback escape from the callback (i.e. do not write it to an --- @IORef@). Also, do not @unsafeFreezeByteArray@ any of the mutable --- byte arrays in the callback. The intent is that the callback will --- write the buffer out. -putMany :: Foldable f - => Int -- ^ Size of shared chunk (use 8176 if uncertain) - -> (a -> Builder) -- ^ Value builder - -> f a -- ^ Collection of values - -> (MutableBytes RealWorld -> IO b) -- ^ Consume chunks. - -> IO () -{-# inline putMany #-} +{- | Run a builder against lots of elements. This fills the same +underlying buffer over and over again. Do not let the argument to +the callback escape from the callback (i.e. do not write it to an +@IORef@). Also, do not @unsafeFreezeByteArray@ any of the mutable +byte arrays in the callback. The intent is that the callback will +write the buffer out. +-} +putMany :: + (Foldable f) => + -- | Size of shared chunk (use 8176 if uncertain) + Int -> + -- | Value builder + (a -> Builder) -> + -- | Collection of values + f a -> + -- | Consume chunks. + (MutableBytes RealWorld -> IO b) -> + IO () +{-# INLINE putMany #-} putMany hint0 g xs cb = do MutableByteArray buf0 <- PM.newByteArray hint - BuilderState bufZ offZ _ cmtsZ <- foldlM - (\st0 a -> do - st1@(BuilderState buf off _ cmts) <- pasteIO (g a) st0 - case cmts of - Initial -> if I# off < threshold - then pure st1 - else do - _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) - pure (BuilderState buf0 0# hint# Initial) - _ -> do - let total = addCommitsLength (I# off) cmts - doff0 = total - I# off - large <- PM.newByteArray total - stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off)) - r <- stToIO (copyReverseCommits large doff0 cmts) - case r of - 0 -> do - _ <- cb (MutableBytes large 0 total) - pure (BuilderState buf0 0# hint# Initial) - _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) - ) (BuilderState buf0 0# hint# Initial) xs + BuilderState bufZ offZ _ cmtsZ <- + foldlM + ( \st0 a -> do + st1@(BuilderState buf off _ cmts) <- pasteIO (g a) st0 + case cmts of + Initial -> + if I# off < threshold + then pure st1 + else do + _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) + pure (BuilderState buf0 0# hint# Initial) + _ -> do + let total = addCommitsLength (I# off) cmts + doff0 = total - I# off + large <- PM.newByteArray total + stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off)) + r <- stToIO (copyReverseCommits large doff0 cmts) + case r of + 0 -> do + _ <- cb (MutableBytes large 0 total) + pure (BuilderState buf0 0# hint# Initial) + _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) + ) + (BuilderState buf0 0# hint# Initial) + xs _ <- case cmtsZ of Initial -> cb (MutableBytes (MutableByteArray bufZ) 0 (I# offZ)) _ -> IO (\s0 -> Exts.raiseIO# putManyError s0) pure () - where + where !hint@(I# hint#) = max hint0 8 !threshold = div (hint * 3) 4 putManyError :: SomeException -{-# noinline putManyError #-} -putManyError = toException - (userError "bytebuild: putMany implementation error") - --- | Variant of 'putMany' that prefixes each pushed array of chunks --- with the number of bytes that the chunks in each batch required. --- (This excludes the bytes required to encode the length itself.) --- This is useful for chunked HTTP encoding. -putManyConsLength :: (Foldable f, MonadIO m) - => Arithmetic.Nat n -- ^ Number of bytes used by the serialization of the length - -> (Int -> Bounded.Builder n) -- ^ Length serialization function - -> Int -- ^ Size of shared chunk (use 8176 if uncertain) - -> (a -> Builder) -- ^ Value builder - -> f a -- ^ Collection of values - -> (MutableBytes RealWorld -> m b) -- ^ Consume chunks. - -> m () -{-# inline putManyConsLength #-} +{-# NOINLINE putManyError #-} +putManyError = + toException + (userError "bytebuild: putMany implementation error") + +{- | Variant of 'putMany' that prefixes each pushed array of chunks +with the number of bytes that the chunks in each batch required. +(This excludes the bytes required to encode the length itself.) +This is useful for chunked HTTP encoding. +-} +putManyConsLength :: + (Foldable f, MonadIO m) => + -- | Number of bytes used by the serialization of the length + Arithmetic.Nat n -> + -- | Length serialization function + (Int -> Bounded.Builder n) -> + -- | Size of shared chunk (use 8176 if uncertain) + Int -> + -- | Value builder + (a -> Builder) -> + -- | Collection of values + f a -> + -- | Consume chunks. + (MutableBytes RealWorld -> m b) -> + m () +{-# INLINE putManyConsLength #-} putManyConsLength n buildSize hint g xs cb = do - let !(I# n# ) = Nat.demote n - let !(I# actual# ) = max hint (I# n# ) + let !(I# n#) = Nat.demote n + let !(I# actual#) = max hint (I# n#) let !threshold = div (I# actual# * 3) 4 - MutableByteArray buf0 <- liftIO (PM.newByteArray (I# actual# )) - BuilderState bufZ offZ _ cmtsZ <- foldlM - (\st0 a -> do - st1@(BuilderState buf off _ cmts) <- liftIO (pasteIO (g a) st0) - case cmts of - Initial -> if I# off < threshold - then pure st1 - else do - let !dist = off -# n# - _ <- liftIO $ stToIO $ UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf0) 0 - _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) - pure (BuilderState buf0 n# (actual# -# n# ) Initial) - _ -> do - let !dist = commitDistance1 buf0 n# buf off cmts - _ <- liftIO $ stToIO $ UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf0) 0 - let total = addCommitsLength (I# off) cmts - doff0 = total - I# off - large <- liftIO (PM.newByteArray total) - liftIO (stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off))) - r <- liftIO (stToIO (copyReverseCommits large doff0 cmts)) - case r of - 0 -> do - _ <- cb (MutableBytes large 0 total) - pure (BuilderState buf0 n# (actual# -# n# ) Initial) - _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) - ) (BuilderState buf0 n# (actual# -# n# ) Initial) xs + MutableByteArray buf0 <- liftIO (PM.newByteArray (I# actual#)) + BuilderState bufZ offZ _ cmtsZ <- + foldlM + ( \st0 a -> do + st1@(BuilderState buf off _ cmts) <- liftIO (pasteIO (g a) st0) + case cmts of + Initial -> + if I# off < threshold + then pure st1 + else do + let !dist = off -# n# + _ <- + liftIO $ + stToIO $ + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf0) + 0 + _ <- cb (MutableBytes (MutableByteArray buf) 0 (I# off)) + pure (BuilderState buf0 n# (actual# -# n#) Initial) + _ -> do + let !dist = commitDistance1 buf0 n# buf off cmts + _ <- + liftIO $ + stToIO $ + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf0) + 0 + let total = addCommitsLength (I# off) cmts + doff0 = total - I# off + large <- liftIO (PM.newByteArray total) + liftIO (stToIO (PM.copyMutableByteArray large doff0 (MutableByteArray buf) 0 (I# off))) + r <- liftIO (stToIO (copyReverseCommits large doff0 cmts)) + case r of + 0 -> do + _ <- cb (MutableBytes large 0 total) + pure (BuilderState buf0 n# (actual# -# n#) Initial) + _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) + ) + (BuilderState buf0 n# (actual# -# n#) Initial) + xs _ <- case cmtsZ of Initial -> do let !distZ = offZ -# n# - _ <- liftIO $ stToIO $ UnsafeBounded.pasteST - (buildSize (fromIntegral (I# distZ))) - (MutableByteArray buf0) - 0 + _ <- + liftIO $ + stToIO $ + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# distZ))) + (MutableByteArray buf0) + 0 cb (MutableBytes (MutableByteArray bufZ) 0 (I# offZ)) _ -> liftIO (IO (\s0 -> Exts.raiseIO# putManyError s0)) pure () --- | Convert a bounded builder to an unbounded one. If the size --- is a constant, use @Arithmetic.Nat.constant@ as the first argument --- to let GHC conjure up this value for you. +{- | Convert a bounded builder to an unbounded one. If the size +is a constant, use @Arithmetic.Nat.constant@ as the first argument +to let GHC conjure up this value for you. +-} fromBounded :: - Arithmetic.Nat n - -> Bounded.Builder n - -> Builder -{-# inline fromBounded #-} + Arithmetic.Nat n -> + Bounded.Builder n -> + Builder +{-# INLINE fromBounded #-} fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> let !(I# req) = Nat.demote n !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case f buf1 off1 s1 of (# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -379,9 +457,9 @@ fromBounded n (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> -- Use this instead of fromBounded (where possible) leads to marginally -- better results in benchmarks. fromBoundedOne :: - Bounded.Builder 1 - -> Builder -{-# inline fromBoundedOne #-} + Bounded.Builder 1 -> + Builder +{-# INLINE fromBoundedOne #-} fromBoundedOne (UnsafeBounded.Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 of 0# -> case Exts.newByteArray# 4080# s0 of @@ -398,31 +476,35 @@ byteArray a = bytes (Bytes a 0 (PM.sizeofByteArray a)) -- | Create a builder from a short bytestring. Implemented with 'bytes'. shortByteString :: ShortByteString -> Builder shortByteString (SBS x) = bytes (Bytes a 0 (PM.sizeofByteArray a)) - where a = ByteArray x - --- | Create a builder from a sliced byte sequence. The variants --- 'copy' and 'insert' provide more control over whether or not --- the byte sequence is copied or aliased. This function is preferred --- when the user does not know the size of the byte sequence. + where + a = ByteArray x + +{- | Create a builder from a sliced byte sequence. The variants +'copy' and 'insert' provide more control over whether or not +the byte sequence is copied or aliased. This function is preferred +when the user does not know the size of the byte sequence. +-} bytes :: Bytes -> Builder -bytes (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - -- There are three cases to consider: (1) there is not enough - -- space and (1a) the chunk is not small or (1b) the chunk is - -- small; (2) There is enough space for a copy. - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case slen# >=# 256# of - 1# -> case Exts.newByteArray# 0# s0 of - (# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) - _ -> case Exts.newByteArray# 4080# s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #) - _ -> let s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 in - (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) +bytes (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + -- There are three cases to consider: (1) there is not enough + -- space and (1a) the chunk is not small or (1b) the chunk is + -- small; (2) There is enough space for a copy. + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case slen# >=# 256# of + 1# -> case Exts.newByteArray# 0# s0 of + (# s1, buf1 #) -> (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) + _ -> case Exts.newByteArray# 4080# s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, 4080# -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 + in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) -- | Paste byte chunks into a builder. chunks :: Chunks -> Builder -{-# noinline chunks #-} +{-# NOINLINE chunks #-} chunks xs0 = -- Implementation note: It would probably be good to begin with a -- goCopying phase before switching to goInserting. If the total @@ -431,8 +513,8 @@ chunks xs0 = -- Note: This function needs a test in the test suite. Builder $ \buf0 off0 len0 cs0 s0 -> case xs0 of ChunksNil -> (# s0, buf0, off0, len0, cs0 #) - ChunksCons{} -> goInserting xs0 (Mutable buf0 off0 cs0) s0 - where + ChunksCons {} -> goInserting xs0 (Mutable buf0 off0 cs0) s0 + where -- Notice that goNoncopying does not take a buffer as an argument. At the -- very end, we create a 128-byte buffer with nothing in it and present -- that as the new buffer. We *cannot* simply reuse the old buffer with @@ -443,74 +525,84 @@ chunks xs0 = goInserting (ChunksCons (Bytes (ByteArray b) (I# off) (I# len)) ys) !cs s0 = goInserting ys (Immutable b off len cs) s0 --- | Create a builder from a byte sequence. This always results in a --- call to @memcpy@. This is beneficial when the byte sequence is --- known to be small (less than 256 bytes). +{- | Create a builder from a byte sequence. This always results in a +call to @memcpy@. This is beneficial when the byte sequence is +known to be small (less than 256 bytes). +-} copy :: Bytes -> Builder -copy (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> let !s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 in - (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where +copy (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Op.copyByteArray# src# soff# buf0 off0 slen# s0 + in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where !(I# newSz) = max (I# slen#) 4080 --- | Variant of 'copy' that additionally pastes an extra byte in --- front of the bytes. +{- | Variant of 'copy' that additionally pastes an extra byte in +front of the bytes. +-} copyCons :: Word8 -> Bytes -> Builder -copyCons (W8# w0) (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# (slen# +# 1#) of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 1# slen# s1 of - s2 -> case Exts.writeWord8Array# buf1 0# w0 s2 of - s3 -> (# s3, buf1, slen# +# 1#, newSz -# (slen# +# 1#), Mutable buf0 off0 cs0 #) - _ -> let !s1 = Op.copyByteArray# src# soff# buf0 (off0 +# 1#) slen# s0 - !s2 = Exts.writeWord8Array# buf0 off0 w0 s1 - in (# s2, buf0, off0 +# (slen# +# 1#), len0 -# (slen# +# 1#), cs0 #) - ) - where +copyCons (W8# w0) (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# (slen# +# 1#) of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# src# soff# buf1 1# slen# s1 of + s2 -> case Exts.writeWord8Array# buf1 0# w0 s2 of + s3 -> (# s3, buf1, slen# +# 1#, newSz -# (slen# +# 1#), Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Op.copyByteArray# src# soff# buf0 (off0 +# 1#) slen# s0 + !s2 = Exts.writeWord8Array# buf0 off0 w0 s1 + in (# s2, buf0, off0 +# (slen# +# 1#), len0 -# (slen# +# 1#), cs0 #) + ) + where !(I# newSz) = max ((I# slen#) + 1) 4080 cstring# :: Addr# -> Builder -{-# inline cstring# #-} +{-# INLINE cstring# #-} cstring# x = cstring (Exts.Ptr x) --- | Create a builder from a C string with explicit length. The builder --- must be executed before the C string is freed. +{- | Create a builder from a C string with explicit length. The builder +must be executed before the C string is freed. +-} cstringLen :: CStringLen -> Builder -cstringLen (Exts.Ptr src#, I# slen# ) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Exts.copyAddrToByteArray# src# buf1 0# slen# s1 of - s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> let !s1 = Exts.copyAddrToByteArray# src# buf0 off0 slen# s0 in - (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where +cstringLen (Exts.Ptr src#, I# slen#) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Exts.copyAddrToByteArray# src# buf1 0# slen# s1 of + s2 -> (# s2, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Exts.copyAddrToByteArray# src# buf0 off0 slen# s0 + in (# s1, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where !(I# newSz) = max (I# slen#) 4080 --- | Encode seven bytes into eight so that the encoded form is eight-bit clean. --- Specifically segment the input bytes inot 7-bit groups (lowest-to-highest --- index byte, most-to-least significant bit within a byte), pads the last group --- with trailing zeros, and forms octects by prepending a zero to each group. --- --- The name was chosen because this pads the input bits with zeros on the right, --- and also because this was likely the originally-indended behavior of the --- SMILE standard (see 'sevenEightSmile'). Right padding the input bits to a --- multiple of seven, as in this variant, is consistent with base64 encodings --- (which encodes 3 bytes in 4) and base85 (which encodes 4 bytes in 5). +{- | Encode seven bytes into eight so that the encoded form is eight-bit clean. +Specifically segment the input bytes inot 7-bit groups (lowest-to-highest +index byte, most-to-least significant bit within a byte), pads the last group +with trailing zeros, and forms octects by prepending a zero to each group. + +The name was chosen because this pads the input bits with zeros on the right, +and also because this was likely the originally-indended behavior of the +SMILE standard (see 'sevenEightSmile'). Right padding the input bits to a +multiple of seven, as in this variant, is consistent with base64 encodings +(which encodes 3 bytes in 4) and base85 (which encodes 4 bytes in 5). +-} sevenEightRight :: Bytes -> Builder sevenEightRight bs0 = case toWord 0 0 bs0 of (0, _) -> mempty (len, w) -> go (len * 8) w <> sevenEightSmile (Bytes.unsafeDrop len bs0) - where + where go :: Int -> Word64 -> Builder go !nBits !_ | nBits <= 0 = mempty go !nBits !w = - let octet = (fromIntegral $ unsafeShiftR w (8*7+1)) .&. 0x7f + let octet = (fromIntegral $ unsafeShiftR w (8 * 7 + 1)) .&. 0x7f in word8 octet <> go (nBits - 7) (unsafeShiftL w 7) toWord :: Int -> Word64 -> Bytes -> (Int, Word64) toWord !i !acc !bs @@ -519,28 +611,29 @@ sevenEightRight bs0 = case toWord 0 0 bs0 of let b = fromIntegral @Word8 @Word64 $ Bytes.unsafeIndex bs 0 acc' = acc .|. unsafeShiftL b (fromIntegral $ 8 * (7 - i)) in if i < 7 - then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) - else (i, acc) - --- | Encode seven bytes into eight so that the encoded form is eight-bit clean. --- Specifically segment the input bytes inot 7-bit groups (lowest-to-highest --- index byte, most-to-least significant bit within a byte), then pad each group --- with zeros on the left until each group is an octet. --- --- The name was chosen because this is the implementation that is used (probably --- unintentionally) in the reference SMILE implementation, and so is expected tp --- be accepted by existing SMILE consumers. + then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) + else (i, acc) + +{- | Encode seven bytes into eight so that the encoded form is eight-bit clean. +Specifically segment the input bytes inot 7-bit groups (lowest-to-highest +index byte, most-to-least significant bit within a byte), then pad each group +with zeros on the left until each group is an octet. + +The name was chosen because this is the implementation that is used (probably +unintentionally) in the reference SMILE implementation, and so is expected tp +be accepted by existing SMILE consumers. +-} sevenEightSmile :: Bytes -> Builder sevenEightSmile bs0 = case toWord 0 0 bs0 of (0, _) -> mempty (len, w) -> go (len * 8) w <> sevenEightSmile (Bytes.unsafeDrop len bs0) - where + where go :: Int -> Word64 -> Builder go !nBits !w | nBits == 0 = mempty | nBits < 7 = go 7 (unsafeShiftR w (7 - nBits)) go !nBits !w = - let octet = (fromIntegral $ unsafeShiftR w (8*7+1)) .&. 0x7f + let octet = (fromIntegral $ unsafeShiftR w (8 * 7 + 1)) .&. 0x7f in word8 octet <> go (nBits - 7) (unsafeShiftL w 7) toWord :: Int -> Word64 -> Bytes -> (Int, Word64) toWord !i !acc !bs @@ -549,44 +642,51 @@ sevenEightSmile bs0 = case toWord 0 0 bs0 of let b = fromIntegral @Word8 @Word64 $ Bytes.unsafeIndex bs 0 acc' = acc .|. unsafeShiftL b (fromIntegral $ 8 * (7 - i)) in if i < 7 - then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) - else (i, acc) + then toWord (i + 1) acc' (Bytes.unsafeDrop 1 bs) + else (i, acc) --- | Create a builder from two byte sequences. This always results in two --- calls to @memcpy@. This is beneficial when the byte sequences are --- known to be small (less than 256 bytes). +{- | Create a builder from two byte sequences. This always results in two +calls to @memcpy@. This is beneficial when the byte sequences are +known to be small (less than 256 bytes). +-} copy2 :: Bytes -> Bytes -> Builder -copy2 (Bytes (ByteArray srcA# ) (I# soffA# ) (I# slenA# )) - (Bytes (ByteArray srcB# ) (I# soffB# ) (I# slenB# )) = Builder - (\buf0 off0 len0 cs0 s0 -> case len0 <# slen# of - 1# -> case Exts.newByteArray# newSz s0 of - (# s1, buf1 #) -> case Op.copyByteArray# srcA# soffA# buf1 0# slenA# s1 of - s2 -> case Op.copyByteArray# srcB# soffB# buf1 slenA# slenB# s2 of - s3 -> (# s3, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) - _ -> let !s1 = Op.copyByteArray# srcA# soffA# buf0 off0 slenA# s0 - !s2 = Op.copyByteArray# srcB# soffB# buf0 (off0 +# slenA# ) slenB# s1 in - (# s2, buf0, off0 +# slen#, len0 -# slen#, cs0 #) - ) - where - !slen# = slenA# +# slenB# - !(I# newSz) = max (I# slen#) 4080 - --- | Create a builder from a byte sequence. This never calls @memcpy@. --- Instead, it pushes a chunk that references the argument byte sequence. --- This wastes the remaining space in the active chunk, so it may adversely --- affect performance if used carelessly. See 'flush' for a way to mitigate --- this problem. This functions is most beneficial when the byte sequence --- is known to be large (more than 8192 bytes). +copy2 + (Bytes (ByteArray srcA#) (I# soffA#) (I# slenA#)) + (Bytes (ByteArray srcB#) (I# soffB#) (I# slenB#)) = + Builder + ( \buf0 off0 len0 cs0 s0 -> case len0 <# slen# of + 1# -> case Exts.newByteArray# newSz s0 of + (# s1, buf1 #) -> case Op.copyByteArray# srcA# soffA# buf1 0# slenA# s1 of + s2 -> case Op.copyByteArray# srcB# soffB# buf1 slenA# slenB# s2 of + s3 -> (# s3, buf1, slen#, newSz -# slen#, Mutable buf0 off0 cs0 #) + _ -> + let !s1 = Op.copyByteArray# srcA# soffA# buf0 off0 slenA# s0 + !s2 = Op.copyByteArray# srcB# soffB# buf0 (off0 +# slenA#) slenB# s1 + in (# s2, buf0, off0 +# slen#, len0 -# slen#, cs0 #) + ) + where + !slen# = slenA# +# slenB# + !(I# newSz) = max (I# slen#) 4080 + +{- | Create a builder from a byte sequence. This never calls @memcpy@. +Instead, it pushes a chunk that references the argument byte sequence. +This wastes the remaining space in the active chunk, so it may adversely +affect performance if used carelessly. See 'flush' for a way to mitigate +this problem. This functions is most beneficial when the byte sequence +is known to be large (more than 8192 bytes). +-} insert :: Bytes -> Builder -insert (Bytes (ByteArray src# ) (I# soff# ) (I# slen# )) = Builder - (\buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of - (# s1, buf1 #) -> - (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) - ) - --- | Create a builder from a slice of an array of 'Word8'. There is the same --- as 'bytes' but is provided as a convenience for users working with different --- types. +insert (Bytes (ByteArray src#) (I# soff#) (I# slen#)) = + Builder + ( \buf0 off0 _ cs0 s0 -> case Exts.newByteArray# 0# s0 of + (# s1, buf1 #) -> + (# s1, buf1, 0#, 0#, Immutable src# soff# slen# (Mutable buf0 off0 cs0) #) + ) + +{- | Create a builder from a slice of an array of 'Word8'. There is the same +as 'bytes' but is provided as a convenience for users working with different +types. +-} word8Array :: PrimArray Word8 -> Int -> Int -> Builder word8Array (PrimArray arr) off len = bytes (Bytes (ByteArray arr) off len) @@ -661,129 +761,134 @@ word16ArrayBE src@(PrimArray arr) soff0 slen0 = case targetByteOrder of word16ArraySwap :: PrimArray Word16 -> Int -> Int -> Builder word16ArraySwap src soff0 slen0 = fromFunction (slen0 * 2) (go (soff0 * 2) ((soff0 + slen0) * 2)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - PM.writeByteArray dst doff v1 - PM.writeByteArray dst (doff + 1) v0 - go (soff + 2) send dst (doff + 2) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + PM.writeByteArray dst doff v1 + PM.writeByteArray dst (doff + 1) v0 + go (soff + 2) send dst (doff + 2) + else pure doff word32ArraySwap :: PrimArray Word32 -> Int -> Int -> Builder word32ArraySwap src soff0 slen0 = fromFunction (slen0 * 4) (go (soff0 * 4) ((soff0 + slen0) * 4)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - PM.writeByteArray dst doff v3 - PM.writeByteArray dst (doff + 1) v2 - PM.writeByteArray dst (doff + 2) v1 - PM.writeByteArray dst (doff + 3) v0 - go (soff + 4) send dst (doff + 4) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + PM.writeByteArray dst doff v3 + PM.writeByteArray dst (doff + 1) v2 + PM.writeByteArray dst (doff + 2) v1 + PM.writeByteArray dst (doff + 3) v0 + go (soff + 4) send dst (doff + 4) + else pure doff word64ArraySwap :: PrimArray Word64 -> Int -> Int -> Builder word64ArraySwap src soff0 slen0 = fromFunction (slen0 * 8) (go (soff0 * 8) ((soff0 + slen0) * 8)) - where + where go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - v4 = PM.indexPrimArray (asWord8s src) (soff + 4) - v5 = PM.indexPrimArray (asWord8s src) (soff + 5) - v6 = PM.indexPrimArray (asWord8s src) (soff + 6) - v7 = PM.indexPrimArray (asWord8s src) (soff + 7) - PM.writeByteArray dst doff v7 - PM.writeByteArray dst (doff + 1) v6 - PM.writeByteArray dst (doff + 2) v5 - PM.writeByteArray dst (doff + 3) v4 - PM.writeByteArray dst (doff + 4) v3 - PM.writeByteArray dst (doff + 5) v2 - PM.writeByteArray dst (doff + 6) v1 - PM.writeByteArray dst (doff + 7) v0 - go (soff + 8) send dst (doff + 8) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + v4 = PM.indexPrimArray (asWord8s src) (soff + 4) + v5 = PM.indexPrimArray (asWord8s src) (soff + 5) + v6 = PM.indexPrimArray (asWord8s src) (soff + 6) + v7 = PM.indexPrimArray (asWord8s src) (soff + 7) + PM.writeByteArray dst doff v7 + PM.writeByteArray dst (doff + 1) v6 + PM.writeByteArray dst (doff + 2) v5 + PM.writeByteArray dst (doff + 3) v4 + PM.writeByteArray dst (doff + 4) v3 + PM.writeByteArray dst (doff + 5) v2 + PM.writeByteArray dst (doff + 6) v1 + PM.writeByteArray dst (doff + 7) v0 + go (soff + 8) send dst (doff + 8) + else pure doff word128ArraySwap :: PrimArray Word128 -> Int -> Int -> Builder word128ArraySwap src soff0 slen0 = fromFunction (slen0 * 16) (go (soff0 * 16) ((soff0 + slen0) * 16)) - where + where -- TODO: Perhaps we could put byteswapping functions to use -- rather than indexing tons of Word8s. This could be done -- both here and in the other swap functions. There are a -- decent number of tests for these array-swapping functions, -- which makes changing this less scary. go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let v0 = PM.indexPrimArray (asWord8s src) soff - v1 = PM.indexPrimArray (asWord8s src) (soff + 1) - v2 = PM.indexPrimArray (asWord8s src) (soff + 2) - v3 = PM.indexPrimArray (asWord8s src) (soff + 3) - v4 = PM.indexPrimArray (asWord8s src) (soff + 4) - v5 = PM.indexPrimArray (asWord8s src) (soff + 5) - v6 = PM.indexPrimArray (asWord8s src) (soff + 6) - v7 = PM.indexPrimArray (asWord8s src) (soff + 7) - v8 = PM.indexPrimArray (asWord8s src) (soff + 8) - v9 = PM.indexPrimArray (asWord8s src) (soff + 9) - v10 = PM.indexPrimArray (asWord8s src) (soff + 10) - v11 = PM.indexPrimArray (asWord8s src) (soff + 11) - v12 = PM.indexPrimArray (asWord8s src) (soff + 12) - v13 = PM.indexPrimArray (asWord8s src) (soff + 13) - v14 = PM.indexPrimArray (asWord8s src) (soff + 14) - v15 = PM.indexPrimArray (asWord8s src) (soff + 15) - PM.writeByteArray dst doff v15 - PM.writeByteArray dst (doff + 1) v14 - PM.writeByteArray dst (doff + 2) v13 - PM.writeByteArray dst (doff + 3) v12 - PM.writeByteArray dst (doff + 4) v11 - PM.writeByteArray dst (doff + 5) v10 - PM.writeByteArray dst (doff + 6) v9 - PM.writeByteArray dst (doff + 7) v8 - PM.writeByteArray dst (doff + 8) v7 - PM.writeByteArray dst (doff + 9) v6 - PM.writeByteArray dst (doff + 10) v5 - PM.writeByteArray dst (doff + 11) v4 - PM.writeByteArray dst (doff + 12) v3 - PM.writeByteArray dst (doff + 13) v2 - PM.writeByteArray dst (doff + 14) v1 - PM.writeByteArray dst (doff + 15) v0 - go (soff + 16) send dst (doff + 16) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let v0 = PM.indexPrimArray (asWord8s src) soff + v1 = PM.indexPrimArray (asWord8s src) (soff + 1) + v2 = PM.indexPrimArray (asWord8s src) (soff + 2) + v3 = PM.indexPrimArray (asWord8s src) (soff + 3) + v4 = PM.indexPrimArray (asWord8s src) (soff + 4) + v5 = PM.indexPrimArray (asWord8s src) (soff + 5) + v6 = PM.indexPrimArray (asWord8s src) (soff + 6) + v7 = PM.indexPrimArray (asWord8s src) (soff + 7) + v8 = PM.indexPrimArray (asWord8s src) (soff + 8) + v9 = PM.indexPrimArray (asWord8s src) (soff + 9) + v10 = PM.indexPrimArray (asWord8s src) (soff + 10) + v11 = PM.indexPrimArray (asWord8s src) (soff + 11) + v12 = PM.indexPrimArray (asWord8s src) (soff + 12) + v13 = PM.indexPrimArray (asWord8s src) (soff + 13) + v14 = PM.indexPrimArray (asWord8s src) (soff + 14) + v15 = PM.indexPrimArray (asWord8s src) (soff + 15) + PM.writeByteArray dst doff v15 + PM.writeByteArray dst (doff + 1) v14 + PM.writeByteArray dst (doff + 2) v13 + PM.writeByteArray dst (doff + 3) v12 + PM.writeByteArray dst (doff + 4) v11 + PM.writeByteArray dst (doff + 5) v10 + PM.writeByteArray dst (doff + 6) v9 + PM.writeByteArray dst (doff + 7) v8 + PM.writeByteArray dst (doff + 8) v7 + PM.writeByteArray dst (doff + 9) v6 + PM.writeByteArray dst (doff + 10) v5 + PM.writeByteArray dst (doff + 11) v4 + PM.writeByteArray dst (doff + 12) v3 + PM.writeByteArray dst (doff + 13) v2 + PM.writeByteArray dst (doff + 14) v1 + PM.writeByteArray dst (doff + 15) v0 + go (soff + 16) send dst (doff + 16) + else pure doff word256ArraySwap :: PrimArray Word256 -> Int -> Int -> Builder word256ArraySwap src soff0 slen0 = fromFunction (slen0 * 32) (go (soff0 * 32) ((soff0 + slen0) * 32)) - where + where -- TODO: Perhaps we could put byteswapping functions to use -- rather than indexing tons of Word8s. This could be done -- both here and in the other swap functions. There are a -- decent number of tests for these array-swapping functions, -- which makes changing this less scary. go :: Int -> Int -> MutableByteArray s -> Int -> ST s Int - go !soff !send !dst !doff = if soff < send - then do - let loop !i - | i < 32 = do - let v = PM.indexPrimArray (asWord8s src) (soff + i) - PM.writeByteArray dst (doff + (31 - i)) v - loop (i + 1) - | otherwise = pure () - loop 0 - go (soff + 32) send dst (doff + 32) - else pure doff + go !soff !send !dst !doff = + if soff < send + then do + let loop !i + | i < 32 = do + let v = PM.indexPrimArray (asWord8s src) (soff + i) + PM.writeByteArray dst (doff + (31 - i)) v + loop (i + 1) + | otherwise = pure () + loop 0 + go (soff + 32) send dst (doff + 32) + else pure doff asWord8s :: PrimArray a -> PrimArray Word8 asWord8s (PrimArray x) = PrimArray x @@ -791,38 +896,43 @@ asWord8s (PrimArray x) = PrimArray x -- Internal function. Precondition, the referenced slice of the -- byte sequence is UTF-8 encoded text. slicedUtf8TextJson :: ByteArray# -> Int# -> Int# -> Builder -{-# noinline slicedUtf8TextJson #-} -slicedUtf8TextJson !src# !soff0# !slen0# = fromFunction# reqLen# - ( \dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# ) - where +{-# NOINLINE slicedUtf8TextJson #-} +slicedUtf8TextJson !src# !soff0# !slen0# = + fromFunction# + reqLen# + (\dst# doff0# s0# -> pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0#) + where -- We multiply by 6 because, in the worst case, everything might be in the -- unprintable ASCII range. The plus 2 is for the quotes on the ends. - !reqLen# = (6# *# slen0# ) +# 2# + !reqLen# = (6# *# slen0#) +# 2# --- | Constructor for 'Builder' that works on a function with lifted --- arguments instead of unlifted ones. This is just as unsafe as the --- actual constructor. +{- | Constructor for 'Builder' that works on a function with lifted +arguments instead of unlifted ones. This is just as unsafe as the +actual constructor. +-} fromFunction :: Int -> (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder -{-# inline fromFunction #-} +{-# INLINE fromFunction #-} fromFunction (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case unST (f (MutableByteArray buf1) (I# off1)) s1 of (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) -fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) ) -> Builder -{-# inline fromFunction# #-} +fromFunction# :: Int# -> (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) -> Builder +{-# INLINE fromFunction# #-} fromFunction# req f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case f buf1 off1 s1 of (# s2, off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -837,189 +947,217 @@ textUtf8 :: Text -> Builder textUtf8 (I.Text (A.ByteArray b) off len) = bytes (Bytes (ByteArray b) off len) --- | Create a builder from text. The text will be UTF-8 encoded, --- and JSON special characters will be escaped. Additionally, the --- result is surrounded by double quotes. For example: --- --- * @foo ==\> "foo"@ (no escape sequences) --- * @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes) --- * @hello\world ==> "hello\\u001Bworld"@ (where @\@ is code point 0x1B) +{- | Create a builder from text. The text will be UTF-8 encoded, +and JSON special characters will be escaped. Additionally, the +result is surrounded by double quotes. For example: + +* @foo ==\> "foo"@ (no escape sequences) +* @\\_"_\/ ==\> "\\\\_\\"_\/"@ (escapes backslashes and quotes) +* @hello\world ==> "hello\\u001Bworld"@ (where @\@ is code point 0x1B) +-} shortTextJsonString :: ShortText -> Builder -{-# inline shortTextJsonString #-} +{-# INLINE shortTextJsonString #-} shortTextJsonString a = let !(ByteArray ba) = shortTextToByteArray a !(I# len) = PM.sizeofByteArray (ByteArray ba) in slicedUtf8TextJson ba 0# len textJsonString :: Text -> Builder -{-# inline textJsonString #-} +{-# INLINE textJsonString #-} textJsonString (I.Text (A.ByteArray ba) (I# off) (I# len)) = slicedUtf8TextJson ba off len --- | Encodes an unsigned 64-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 64-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word64Dec :: Word64 -> Builder word64Dec w = fromBounded Nat.constant (Bounded.word64Dec w) --- | Encodes an unsigned 16-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 16-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word32Dec :: Word32 -> Builder word32Dec w = fromBounded Nat.constant (Bounded.word32Dec w) --- | Encodes an unsigned 16-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 16-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word16Dec :: Word16 -> Builder word16Dec w = fromBounded Nat.constant (Bounded.word16Dec w) --- | Encodes an unsigned 8-bit integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned 8-bit integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} word8Dec :: Word8 -> Builder word8Dec w = fromBounded Nat.constant (Bounded.word8Dec w) --- | Encodes an unsigned machine-sized integer as decimal. --- This encoding never starts with a zero unless the --- argument was zero. +{- | Encodes an unsigned machine-sized integer as decimal. +This encoding never starts with a zero unless the +argument was zero. +-} wordDec :: Word -> Builder wordDec w = fromBounded Nat.constant (Bounded.wordDec w) --- | Encode a double-floating-point number, using decimal notation or --- scientific notation depending on the magnitude. This has undefined --- behavior when representing @+inf@, @-inf@, and @NaN@. It will not --- crash, but the generated numbers will be nonsense. +{- | Encode a double-floating-point number, using decimal notation or +scientific notation depending on the magnitude. This has undefined +behavior when representing @+inf@, @-inf@, and @NaN@. It will not +crash, but the generated numbers will be nonsense. +-} doubleDec :: Double -> Builder doubleDec w = fromBounded Nat.constant (Bounded.doubleDec w) --- | Encodes a signed 64-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 64-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int64Dec :: Int64 -> Builder int64Dec w = fromBounded Nat.constant (Bounded.int64Dec w) --- | Encodes a signed 32-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 32-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int32Dec :: Int32 -> Builder int32Dec w = fromBounded Nat.constant (Bounded.int32Dec w) --- | Encodes a signed 16-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 16-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int16Dec :: Int16 -> Builder int16Dec w = fromBounded Nat.constant (Bounded.int16Dec w) --- | Encodes a signed 8-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed 8-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int8Dec :: Int8 -> Builder int8Dec w = fromBounded Nat.constant (Bounded.int8Dec w) --- | Encodes a signed machine-sized integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encodes a signed machine-sized integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} intDec :: Int -> Builder intDec w = fromBounded Nat.constant (Bounded.intDec w) --- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 16 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 1022 as @00000000000003FE@. +{- | Encode a 64-bit unsigned integer as hexadecimal, zero-padding +the encoding to 16 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 1022 as @00000000000003FE@. +-} word64PaddedUpperHex :: Word64 -> Builder word64PaddedUpperHex w = fromBounded Nat.constant (Bounded.word64PaddedUpperHex w) --- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 8 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 1022 as @000003FE@. +{- | Encode a 32-bit unsigned integer as hexadecimal, zero-padding +the encoding to 8 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 1022 as @000003FE@. +-} word32PaddedUpperHex :: Word32 -> Builder word32PaddedUpperHex w = fromBounded Nat.constant (Bounded.word32PaddedUpperHex w) --- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 4 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 1022 as @03FE@. +{- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding +the encoding to 4 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 1022 as @03FE@. +-} word16PaddedUpperHex :: Word16 -> Builder word16PaddedUpperHex w = fromBounded Nat.constant (Bounded.word16PaddedUpperHex w) --- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 4 digits. This uses lowercase for the alphabetical --- digits. For example, this encodes the number 1022 as @03fe@. +{- | Encode a 16-bit unsigned integer as hexadecimal, zero-padding +the encoding to 4 digits. This uses lowercase for the alphabetical +digits. For example, this encodes the number 1022 as @03fe@. +-} word16PaddedLowerHex :: Word16 -> Builder word16PaddedLowerHex w = fromBounded Nat.constant (Bounded.word16PaddedLowerHex w) --- | Encode a 16-bit unsigned integer as hexadecimal without leading --- zeroes. This uses lowercase for the alphabetical digits. For --- example, this encodes the number 1022 as @3fe@. +{- | Encode a 16-bit unsigned integer as hexadecimal without leading +zeroes. This uses lowercase for the alphabetical digits. For +example, this encodes the number 1022 as @3fe@. +-} word16LowerHex :: Word16 -> Builder word16LowerHex w = fromBounded Nat.constant (Bounded.word16LowerHex w) --- | Encode a 16-bit unsigned integer as hexadecimal without leading --- zeroes. This uses uppercase for the alphabetical digits. For --- example, this encodes the number 1022 as @3FE@. +{- | Encode a 16-bit unsigned integer as hexadecimal without leading +zeroes. This uses uppercase for the alphabetical digits. For +example, this encodes the number 1022 as @3FE@. +-} word16UpperHex :: Word16 -> Builder word16UpperHex w = fromBounded Nat.constant (Bounded.word16UpperHex w) --- | Encode a 16-bit unsigned integer as hexadecimal without leading --- zeroes. This uses lowercase for the alphabetical digits. For --- example, this encodes the number 1022 as @3FE@. +{- | Encode a 16-bit unsigned integer as hexadecimal without leading +zeroes. This uses lowercase for the alphabetical digits. For +example, this encodes the number 1022 as @3FE@. +-} word8LowerHex :: Word8 -> Builder word8LowerHex w = fromBounded Nat.constant (Bounded.word8LowerHex w) --- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding --- the encoding to 2 digits. This uses uppercase for the alphabetical --- digits. For example, this encodes the number 11 as @0B@. +{- | Encode a 8-bit unsigned integer as hexadecimal, zero-padding +the encoding to 2 digits. This uses uppercase for the alphabetical +digits. For example, this encodes the number 11 as @0B@. +-} word8PaddedUpperHex :: Word8 -> Builder word8PaddedUpperHex w = fromBounded Nat.constant (Bounded.word8PaddedUpperHex w) --- | Encode an ASCII char. --- Precondition: Input must be an ASCII character. This is not checked. +{- | Encode an ASCII char. +Precondition: Input must be an ASCII character. This is not checked. +-} ascii :: Char -> Builder ascii c = fromBoundedOne (Bounded.ascii c) --- | Encode two ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode two ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii2 :: Char -> Char -> Builder ascii2 a b = fromBounded Nat.constant (Bounded.ascii2 a b) --- | Encode three ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode three ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii3 :: Char -> Char -> Char -> Builder ascii3 a b c = fromBounded Nat.constant (Bounded.ascii3 a b c) --- | Encode four ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode four ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii4 :: Char -> Char -> Char -> Char -> Builder ascii4 a b c d = fromBounded Nat.constant (Bounded.ascii4 a b c d) --- | Encode five ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode five ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii5 :: Char -> Char -> Char -> Char -> Char -> Builder ascii5 a b c d e = fromBounded Nat.constant (Bounded.ascii5 a b c d e) --- | Encode six ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode six ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii6 :: Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii6 a b c d e f = fromBounded Nat.constant (Bounded.ascii6 a b c d e f) --- | Encode seven ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode seven ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii7 a b c d e f g = fromBounded Nat.constant (Bounded.ascii7 a b c d e f g) --- | Encode eight ASCII characters. --- Precondition: Must be an ASCII characters. This is not checked. +{- | Encode eight ASCII characters. +Precondition: Must be an ASCII characters. This is not checked. +-} ascii8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder ascii8 a b c d e f g h = fromBounded Nat.constant (Bounded.ascii8 a b c d e f g h) @@ -1030,83 +1168,99 @@ char c = fromBounded Nat.constant (Bounded.char c) unST :: ST s a -> State# s -> (# State# s, a #) unST (ST f) = f --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- signed integer in a little-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +signed integer in a little-endian fashion. +-} int64LE :: Int64 -> Builder int64LE w = fromBounded Nat.constant (Bounded.int64LE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- signed integer in a little-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +signed integer in a little-endian fashion. +-} int32LE :: Int32 -> Builder int32LE w = fromBounded Nat.constant (Bounded.int32LE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- signed integer in a little-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +signed integer in a little-endian fashion. +-} int16LE :: Int16 -> Builder int16LE w = fromBounded Nat.constant (Bounded.int16LE w) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- signed integer in a big-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +signed integer in a big-endian fashion. +-} int64BE :: Int64 -> Builder int64BE w = fromBounded Nat.constant (Bounded.int64BE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- signed integer in a big-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +signed integer in a big-endian fashion. +-} int32BE :: Int32 -> Builder int32BE w = fromBounded Nat.constant (Bounded.int32BE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- signed integer in a big-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +signed integer in a big-endian fashion. +-} int16BE :: Int16 -> Builder int16BE w = fromBounded Nat.constant (Bounded.int16BE w) --- | Requires exactly 32 bytes. Dump the octets of a 256-bit --- word in a little-endian fashion. +{- | Requires exactly 32 bytes. Dump the octets of a 256-bit +word in a little-endian fashion. +-} word256LE :: Word256 -> Builder word256LE w = fromBounded Nat.constant (Bounded.word256LE w) --- | Requires exactly 16 bytes. Dump the octets of a 128-bit --- word in a little-endian fashion. +{- | Requires exactly 16 bytes. Dump the octets of a 128-bit +word in a little-endian fashion. +-} word128LE :: Word128 -> Builder word128LE w = fromBounded Nat.constant (Bounded.word128LE w) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a little-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a little-endian fashion. +-} word64LE :: Word64 -> Builder word64LE w = fromBounded Nat.constant (Bounded.word64LE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a little-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a little-endian fashion. +-} word32LE :: Word32 -> Builder word32LE w = fromBounded Nat.constant (Bounded.word32LE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a little-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a little-endian fashion. +-} word16LE :: Word16 -> Builder word16LE w = fromBounded Nat.constant (Bounded.word16LE w) --- | Requires exactly 32 bytes. Dump the octets of a 256-bit --- word in a big-endian fashion. +{- | Requires exactly 32 bytes. Dump the octets of a 256-bit +word in a big-endian fashion. +-} word256BE :: Word256 -> Builder word256BE w = fromBounded Nat.constant (Bounded.word256BE w) --- | Requires exactly 16 bytes. Dump the octets of a 128-bit --- word in a big-endian fashion. +{- | Requires exactly 16 bytes. Dump the octets of a 128-bit +word in a big-endian fashion. +-} word128BE :: Word128 -> Builder word128BE w = fromBounded Nat.constant (Bounded.word128BE w) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a big-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a big-endian fashion. +-} word64BE :: Word64 -> Builder word64BE w = fromBounded Nat.constant (Bounded.word64BE w) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a big-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a big-endian fashion. +-} word32BE :: Word32 -> Builder word32BE w = fromBounded Nat.constant (Bounded.word32BE w) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a big-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a big-endian fashion. +-} word16BE :: Word16 -> Builder word16BE w = fromBounded Nat.constant (Bounded.word16BE w) @@ -1116,11 +1270,14 @@ word8 w = fromBoundedOne (Bounded.word8 w) -- | Prefix a builder with the number of bytes that it requires. consLength :: - Arithmetic.Nat n -- ^ Number of bytes used by the serialization of the length - -> (Int -> Bounded.Builder n) -- ^ Length serialization function - -> Builder -- ^ Builder whose length is measured - -> Builder -{-# inline consLength #-} + -- | Number of bytes used by the serialization of the length + Arithmetic.Nat n -> + -- | Length serialization function + (Int -> Bounded.Builder n) -> + -- | Builder whose length is measured + Builder -> + Builder +{-# INLINE consLength #-} consLength !n buildSize (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> -- There is actually a little bit of unsoundness here. If the number of -- bytes required to encode the length is greater than 4080, this will @@ -1134,175 +1291,200 @@ consLength !n buildSize (Builder f) = Builder $ \buf0 off0 len0 cs0 s0 -> in case f buf1 (off1 +# lenSz) (len1 -# lenSz) cs1 s1 of (# s2, buf2, off2, len2, cs2 #) -> let !dist = commitDistance1 buf1 (off1 +# lenSz) buf2 off2 cs2 - ST g = UnsafeBounded.pasteST - (buildSize (fromIntegral (I# dist))) - (MutableByteArray buf1) - (I# off1) + ST g = + UnsafeBounded.pasteST + (buildSize (fromIntegral (I# dist))) + (MutableByteArray buf1) + (I# off1) in case g s2 of (# s3, _ #) -> (# s3, buf2, off2, len2, cs2 #) --- | Variant of 'consLength32BE' the encodes the length in --- a little-endian fashion. +{- | Variant of 'consLength32BE' the encodes the length in +a little-endian fashion. +-} consLength32LE :: Builder -> Builder consLength32LE = consLength Nat.constant (\x -> Bounded.word32LE (fromIntegral x)) --- | Prefix a builder with its size in bytes. This size is --- presented as a big-endian 32-bit word. The need to prefix --- a builder with its length shows up a numbers of wire protocols --- including those of PostgreSQL and Apache Kafka. Note the --- equivalence: --- --- > forall (n :: Int) (x :: Builder). --- > let sz = sizeofByteArray (run n (consLength32BE x)) --- > consLength32BE x === word32BE (fromIntegral sz) <> x --- --- However, using 'consLength32BE' is much more efficient here --- since it only materializes the 'ByteArray' once. +{- | Prefix a builder with its size in bytes. This size is +presented as a big-endian 32-bit word. The need to prefix +a builder with its length shows up a numbers of wire protocols +including those of PostgreSQL and Apache Kafka. Note the +equivalence: + +> forall (n :: Int) (x :: Builder). +> let sz = sizeofByteArray (run n (consLength32BE x)) +> consLength32BE x === word32BE (fromIntegral sz) <> x + +However, using 'consLength32BE' is much more efficient here +since it only materializes the 'ByteArray' once. +-} consLength32BE :: Builder -> Builder consLength32BE = consLength Nat.constant (\x -> Bounded.word32BE (fromIntegral x)) --- | Prefix a builder with its size in bytes. This size is --- presented as a big-endian 64-bit word. See 'consLength32BE'. +{- | Prefix a builder with its size in bytes. This size is +presented as a big-endian 64-bit word. See 'consLength32BE'. +-} consLength64BE :: Builder -> Builder consLength64BE = consLength Nat.constant (\x -> Bounded.word64BE (fromIntegral x)) --- | Push the buffer currently being filled onto the chunk list, --- allocating a new active buffer of the requested size. This is --- helpful when a small builder is sandwhiched between two large --- zero-copy builders: --- --- > insert bigA <> flush 1 <> word8 0x42 <> insert bigB --- --- Without @flush 1@, @word8 0x42@ would see the zero-byte active --- buffer that 'insert' returned, decide that it needed more space, --- and allocate a 4080-byte buffer to which only a single byte --- would be written. +{- | Push the buffer currently being filled onto the chunk list, +allocating a new active buffer of the requested size. This is +helpful when a small builder is sandwhiched between two large +zero-copy builders: + +> insert bigA <> flush 1 <> word8 0x42 <> insert bigB + +Without @flush 1@, @word8 0x42@ would see the zero-byte active +buffer that 'insert' returned, decide that it needed more space, +and allocate a 4080-byte buffer to which only a single byte +would be written. +-} flush :: Int -> Builder flush !reqSz = Builder $ \buf0 off0 _ cs0 s0 -> case Exts.newByteArray# sz# s0 of (# sX, bufX #) -> (# sX, bufX, 0#, sz#, Mutable buf0 off0 cs0 #) - where - !(I# sz# ) = max reqSz 0 + where + !(I# sz#) = max reqSz 0 -- ShortText is already UTF-8 encoded. This is a no-op. shortTextToByteArray :: ShortText -> ByteArray shortTextToByteArray x = case TS.toShortByteString x of SBS a -> ByteArray a --- | Encode a signed machine-sized integer with LEB-128. This uses --- zig-zag encoding. +{- | Encode a signed machine-sized integer with LEB-128. This uses +zig-zag encoding. +-} intLEB128 :: Int -> Builder -{-# inline intLEB128 #-} +{-# INLINE intLEB128 #-} intLEB128 = wordLEB128 . toZigzagNative -- | Encode a 32-bit signed integer with LEB-128. This uses zig-zag encoding. int32LEB128 :: Int32 -> Builder -{-# inline int32LEB128 #-} +{-# INLINE int32LEB128 #-} int32LEB128 = word32LEB128 . toZigzag32 -- | Encode a 64-bit signed integer with LEB-128. This uses zig-zag encoding. int64LEB128 :: Int64 -> Builder -{-# inline int64LEB128 #-} +{-# INLINE int64LEB128 #-} int64LEB128 = word64LEB128 . toZigzag64 -- | Encode a machine-sized word with LEB-128. wordLEB128 :: Word -> Builder -{-# inline wordLEB128 #-} +{-# INLINE wordLEB128 #-} wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w) -- | Encode a 16-bit word with LEB-128. word16LEB128 :: Word16 -> Builder -{-# inline word16LEB128 #-} +{-# INLINE word16LEB128 #-} word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w) -- | Encode a 32-bit word with LEB-128. word32LEB128 :: Word32 -> Builder -{-# inline word32LEB128 #-} +{-# INLINE word32LEB128 #-} word32LEB128 w = fromBounded Nat.constant (Bounded.word32LEB128 w) -- | Encode a 64-bit word with LEB-128. word64LEB128 :: Word64 -> Builder -{-# inline word64LEB128 #-} +{-# INLINE word64LEB128 #-} word64LEB128 w = fromBounded Nat.constant (Bounded.word64LEB128 w) -- | Encode a machine-sized word with VLQ. wordVlq :: Word -> Builder -{-# inline wordVlq #-} +{-# INLINE wordVlq #-} wordVlq w = fromBounded Nat.constant (Bounded.wordVlq w) -- | Encode a 32-bit word with VLQ. word32Vlq :: Word32 -> Builder -{-# inline word32Vlq #-} +{-# INLINE word32Vlq #-} word32Vlq w = fromBounded Nat.constant (Bounded.word32Vlq w) -- | Encode a 64-bit word with VLQ. word64Vlq :: Word64 -> Builder -{-# inline word64Vlq #-} +{-# INLINE word64Vlq #-} word64Vlq w = fromBounded Nat.constant (Bounded.word64Vlq w) --- | Encode a signed arbitrary-precision integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Encode a signed arbitrary-precision integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} integerDec :: Integer -> Builder integerDec !i | i < 0 = ascii '-' <> naturalDec (naturalFromInteger (negate i)) | otherwise = naturalDec (naturalFromInteger i) --- | Encodes an unsigned arbitrary-precision integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Encodes an unsigned arbitrary-precision integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} naturalDec :: Natural -> Builder -naturalDec !n0 = fromEffect - (I# (11# +# (3# *# integerLog2# (naturalToInteger n0)))) - (\marr off -> case n0 of - 0 -> do - PM.writeByteArray marr off (0x30 :: Word8) - pure (off + 1) - _ -> go n0 marr off off - ) - where +naturalDec !n0 = + fromEffect + (I# (11# +# (3# *# integerLog2# (naturalToInteger n0)))) + ( \marr off -> case n0 of + 0 -> do + PM.writeByteArray marr off (0x30 :: Word8) + pure (off + 1) + _ -> go n0 marr off off + ) + where go :: forall s. Natural -> MutableByteArray s -> Int -> Int -> ST s Int go !n !buf !off0 !off = case quotRem n 1_000_000_000 of - (q,r) -> case q of + (q, r) -> case q of 0 -> do off' <- backwardsWordLoop buf off (fromIntegral @Natural @Word r) reverseBytes buf off0 (off' - 1) pure off' _ -> do - off' <- backwardsPasteWordPaddedDec9 - (fromIntegral @Natural @Word r) buf off + off' <- + backwardsPasteWordPaddedDec9 + (fromIntegral @Natural @Word r) + buf + off go q buf off0 off' -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# inline reverseBytes #-} -reverseBytes arr begin end = go begin end where - go ixA ixB = if ixA < ixB - then do - a :: Word8 <- PM.readByteArray arr ixA - b :: Word8 <- PM.readByteArray arr ixB - PM.writeByteArray arr ixA b - PM.writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# INLINE reverseBytes #-} +reverseBytes arr begin end = go begin end + where + go ixA ixB = + if ixA < ixB + then do + a :: Word8 <- PM.readByteArray arr ixA + b :: Word8 <- PM.readByteArray arr ixB + PM.writeByteArray arr ixA b + PM.writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () backwardsPasteWordPaddedDec9 :: Word -> MutableByteArray s -> Int -> ST s Int backwardsPasteWordPaddedDec9 !w !arr !off = do backwardsPutRem10 - (backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $ - backwardsPutRem10 $ backwardsPutRem10 $ backwardsPutRem10 $ - backwardsPutRem10 $ backwardsPutRem10 - (\_ _ _ -> pure ()) - ) arr off w + ( backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 $ + backwardsPutRem10 + (\_ _ _ -> pure ()) + ) + arr + off + w pure (off + 9) backwardsPutRem10 :: - (MutableByteArray s -> Int -> Word -> ST s a) - -> MutableByteArray s -> Int -> Word -> ST s a -{-# inline backwardsPutRem10 #-} + (MutableByteArray s -> Int -> Word -> ST s a) -> + MutableByteArray s -> + Int -> + Word -> + ST s a +{-# INLINE backwardsPutRem10 #-} backwardsPutRem10 andThen arr off dividend = do let quotient = approxDiv10 dividend remainder = dividend - (10 * quotient) @@ -1310,25 +1492,31 @@ backwardsPutRem10 andThen arr off dividend = do andThen arr (off + 1) quotient backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# inline backwardsWordLoop #-} -backwardsWordLoop arr off0 x0 = go off0 x0 where - go !off !(x :: Word) = if x > 0 - then do - let (y,z) = quotRem x 10 - PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) - go (off + 1) y - else pure off +{-# INLINE backwardsWordLoop #-} +backwardsWordLoop arr off0 x0 = go off0 x0 + where + go !off !(x :: Word) = + if x > 0 + then do + let (y, z) = quotRem x 10 + PM.writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) + go (off + 1) y + else pure off -- | Replicate a byte the given number of times. replicate :: - Int -- ^ Number of times to replicate the byte - -> Word8 -- ^ Byte to replicate - -> Builder -replicate !len !w = fromEffect len - (\marr off -> do - PM.setByteArray marr off len w - pure (off + len) - ) + -- | Number of times to replicate the byte + Int -> + -- | Byte to replicate + Word8 -> + Builder +replicate !len !w = + fromEffect + len + ( \marr off -> do + PM.setByteArray marr off len w + pure (off + len) + ) -- Based on C code from https://stackoverflow.com/a/5558614 -- For numbers less than 1073741829, this gives a correct answer. @@ -1350,21 +1538,22 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# (C.wordToWord8# w) --- | This function and the documentation for it are copied from --- Takano Akio's fast-builder library. --- --- @'rebuild' b@ is equivalent to @b@, but it allows GHC to assume --- that @b@ will be run at most once. This can enable various --- optimizations that greately improve performance. --- --- There are two types of typical situations where a use of 'rebuild' --- is often a win: --- --- * When constructing a builder using a recursive function. e.g. --- @rebuild $ foldr ...@. --- * When constructing a builder using a conditional expression. e.g. --- @rebuild $ case x of ... @ +{- | This function and the documentation for it are copied from +Takano Akio's fast-builder library. + +@'rebuild' b@ is equivalent to @b@, but it allows GHC to assume +that @b@ will be run at most once. This can enable various +optimizations that greately improve performance. + +There are two types of typical situations where a use of 'rebuild' +is often a win: + +* When constructing a builder using a recursive function. e.g. + @rebuild $ foldr ...@. +* When constructing a builder using a conditional expression. e.g. + @rebuild $ case x of ... @ +-} rebuild :: Builder -> Builder -{-# inline rebuild #-} +{-# INLINE rebuild #-} rebuild (Builder f) = Builder $ oneShot $ \a -> oneShot $ \b -> oneShot $ \c -> oneShot $ \d -> oneShot $ \e -> f a b c d e diff --git a/src/Data/Bytes/Builder/Avro.hs b/src/Data/Bytes/Builder/Avro.hs index 9c1971b..f86f350 100644 --- a/src/Data/Bytes/Builder/Avro.hs +++ b/src/Data/Bytes/Builder/Avro.hs @@ -1,8 +1,9 @@ -{-# language BangPatterns #-} +{-# LANGUAGE BangPatterns #-} --- | Builders for encoding data with Apache Avro. Most functions in this --- module are just aliases for other functions. Avro uses zig-zag LEB128 --- for all integral types. +{- | Builders for encoding data with Apache Avro. Most functions in this +module are just aliases for other functions. Avro uses zig-zag LEB128 +for all integral types. +-} module Data.Bytes.Builder.Avro ( int , int32 @@ -13,21 +14,22 @@ module Data.Bytes.Builder.Avro , bytes , chunks , text + -- * Maps , map2 ) where -import Data.Int -import Data.Word +import Data.Bytes (Bytes) import Data.Bytes.Builder (Builder) +import Data.Bytes.Chunks (Chunks) +import Data.Int import Data.Text (Text) -import Data.Bytes (Bytes) import Data.WideWord (Word128) -import Data.Bytes.Chunks (Chunks) +import Data.Word import qualified Data.Bytes as Bytes -import qualified Data.Bytes.Chunks as Chunks import qualified Data.Bytes.Builder as B +import qualified Data.Bytes.Chunks as Chunks import qualified Data.Bytes.Text.Utf8 as Utf8 int32 :: Int32 -> Builder @@ -39,19 +41,22 @@ int64 = B.int64LEB128 int :: Int -> Builder int = B.intLEB128 --- | Note: This results in a zigzag encoded number. Avro does not have --- unsigned types. +{- | Note: This results in a zigzag encoded number. Avro does not have +unsigned types. +-} word16 :: Word16 -> Builder word16 = B.int32LEB128 . fromIntegral --- | Note: This results in a zigzag encoded number. Avro does not have --- unsigned types. +{- | Note: This results in a zigzag encoded number. Avro does not have +unsigned types. +-} word32 :: Word32 -> Builder word32 = B.int64LEB128 . fromIntegral --- | Note: This results in a @fixed@ encoded value of length 16. In the --- schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@. --- A big-endian encoding is used. +{- | Note: This results in a @fixed@ encoded value of length 16. In the +schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@. +A big-endian encoding is used. +-} word128 :: Word128 -> Builder word128 = B.word128BE @@ -64,14 +69,19 @@ chunks !b = int (Chunks.length b) <> B.chunks b text :: Text -> Builder text = bytes . Utf8.fromText --- | Encode a map with exactly two key-value pairs. The keys are text. --- This is commonly used to encode the header in an avro file, which has --- a map with two keys: @avro.schema@ and @avro.codec@. +{- | Encode a map with exactly two key-value pairs. The keys are text. +This is commonly used to encode the header in an avro file, which has +a map with two keys: @avro.schema@ and @avro.codec@. +-} map2 :: - Text -- ^ First key - -> Builder -- ^ First value (already encoded) - -> Text -- ^ Second key - -> Builder -- ^ Second value (already encoded) - -> Builder -{-# inline map2 #-} + -- | First key + Text -> + -- | First value (already encoded) + Builder -> + -- | Second key + Text -> + -- | Second value (already encoded) + Builder -> + Builder +{-# INLINE map2 #-} map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00 diff --git a/src/Data/Bytes/Builder/Bounded.hs b/src/Data/Bytes/Builder/Bounded.hs index ed4df12..5703ef3 100644 --- a/src/Data/Bytes/Builder/Bounded.hs +++ b/src/Data/Bytes/Builder/Bounded.hs @@ -1,33 +1,38 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language NumericUnderscores #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language TypeOperators #-} -{-# language UnboxedTuples #-} -{-# language UnliftedFFITypes #-} - --- | The functions in this module are explict about the maximum number --- of bytes they require. +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} + +{- | The functions in this module are explict about the maximum number +of bytes they require. +-} module Data.Bytes.Builder.Bounded ( -- * Builder Builder + -- * Execute , run , runByteString , pasteGrowST + -- * Combine , empty , append + -- * Bounds Manipulation , weaken , substitute + -- * Encode Integral Types + -- ** Human-Readable , word64Dec , word32Dec @@ -39,25 +44,32 @@ module Data.Bytes.Builder.Bounded , int16Dec , int8Dec , intDec + -- * Unsigned Words + -- ** Wide Words , word128PaddedLowerHex , word128PaddedUpperHex , word256PaddedLowerHex , word256PaddedUpperHex + -- ** 64-bit , word64PaddedLowerHex , word64PaddedUpperHex + -- ** 48-bit , word48PaddedLowerHex + -- ** 32-bit , word32PaddedLowerHex , word32PaddedUpperHex + -- ** 16-bit , word16PaddedLowerHex , word16PaddedUpperHex , word16LowerHex , word16UpperHex + -- ** 8-bit , word8PaddedLowerHex , word8PaddedUpperHex @@ -71,14 +83,18 @@ module Data.Bytes.Builder.Bounded , ascii7 , ascii8 , char + -- ** Native , wordPaddedDec2 , wordPaddedDec3 , wordPaddedDec4 , wordPaddedDec9 + -- ** Machine-Readable + -- *** One , word8 + -- **** Big Endian , word256BE , word128BE @@ -88,6 +104,7 @@ module Data.Bytes.Builder.Bounded , int64BE , int32BE , int16BE + -- **** Little Endian , word256LE , word128LE @@ -97,7 +114,9 @@ module Data.Bytes.Builder.Bounded , int64LE , int32LE , int16LE + -- **** LEB128 + -- | LEB128 encodes an integer in 7-bit units, least significant bits first, -- with the high bit of each output byte set to 1 in all bytes except for -- the final byte. @@ -105,36 +124,38 @@ module Data.Bytes.Builder.Bounded , word16LEB128 , word32LEB128 , word64LEB128 + -- **** VLQ + -- | VLQ (also known as VByte, Varint, VInt) encodes an integer in 7-bit -- units, most significant bits first, with the high bit of each output byte -- set to 1 in all bytes except for the final byte. , wordVlq , word32Vlq , word64Vlq + -- * Encode Floating-Point Types , doubleDec ) where -import Arithmetic.Types (type (<=), type (:=:)) +import Arithmetic.Types (type (:=:), type (<=)) import Control.Monad.Primitive (primitive_) import Control.Monad.ST (ST) -import Control.Monad.ST.Run (runByteArrayST,runIntByteArrayST) +import Control.Monad.ST.Run (runByteArrayST, runIntByteArrayST) import Data.Bits -import Data.Bytes.Builder.Bounded.Unsafe (Builder(..)) import Data.ByteString (ByteString) +import Data.Bytes.Builder.Bounded.Unsafe (Builder (..)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) -import Data.Primitive (MutableByteArray(..),ByteArray,writeByteArray) -import Data.Primitive (readByteArray,newByteArray,unsafeFreezeByteArray) -import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) -import Data.WideWord (Word128(Word128),Word256(Word256)) +import Data.Primitive (ByteArray, MutableByteArray (..), newByteArray, readByteArray, unsafeFreezeByteArray, writeByteArray) +import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset (..)) +import Data.WideWord (Word128 (Word128), Word256 (Word256)) import GHC.Exts -import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#)) import GHC.IO (unsafeIOToST) -import GHC.ST (ST(ST)) +import GHC.Int (Int16 (I16#), Int32 (I32#), Int64 (I64#), Int8 (I8#)) +import GHC.ST (ST (ST)) import GHC.TypeLits (type (+)) -import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#)) -import Data.Bytes.Types (Bytes(Bytes)) +import GHC.Word (Word16 (W16#), Word32 (W32#), Word64 (W64#), Word8 (W8#)) import qualified Compat as C @@ -145,48 +166,53 @@ import qualified Data.Bytes as Bytes import qualified Data.Bytes.Builder.Bounded.Unsafe as Unsafe import qualified Data.Primitive as PM --- | Execute the bounded builder. If the size is a constant, --- use @Arithmetic.Nat.constant@ as the first argument to let --- GHC conjure up this value for you. +{- | Execute the bounded builder. If the size is a constant, +use @Arithmetic.Nat.constant@ as the first argument to let +GHC conjure up this value for you. +-} run :: - Arithmetic.Nat n - -> Builder n -- ^ Builder - -> ByteArray -{-# inline run #-} + Arithmetic.Nat n -> + -- | Builder + Builder n -> + ByteArray +{-# INLINE run #-} run n b = runByteArrayST $ do arr <- newByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len unsafeFreezeByteArray arr --- | Variant of 'run' that puts the result in a pinned buffer and --- packs it up in a 'ByteString'. +{- | Variant of 'run' that puts the result in a pinned buffer and +packs it up in a 'ByteString'. +-} runByteString :: - Arithmetic.Nat n - -> Builder n -- ^ Builder - -> ByteString -{-# inline runByteString #-} + Arithmetic.Nat n -> + -- | Builder + Builder n -> + ByteString +{-# INLINE runByteString #-} runByteString n b = - let (finalLen,r) = runIntByteArrayST $ do + let (finalLen, r) = runIntByteArrayST $ do arr <- PM.newPinnedByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len arr' <- unsafeFreezeByteArray arr - pure (len,arr') + pure (len, arr') in Bytes.pinnedToByteString (Bytes r 0 finalLen) --- | Paste the builder into the byte array starting at offset zero. --- This reallocates the byte array if it cannot accomodate the builder, --- growing it by the minimum amount necessary. +{- | Paste the builder into the byte array starting at offset zero. +This reallocates the byte array if it cannot accomodate the builder, +growing it by the minimum amount necessary. +-} pasteGrowST :: - Arithmetic.Nat n - -> Builder n - -> MutableByteArrayOffset s - -- ^ Initial buffer, used linearly. Do not reuse this argument. - -> ST s (MutableByteArrayOffset s) - -- ^ Final buffer that accomodated the builder. -{-# inline pasteGrowST #-} -pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do + Arithmetic.Nat n -> + Builder n -> + -- | Initial buffer, used linearly. Do not reuse this argument. + MutableByteArrayOffset s -> + -- | Final buffer that accomodated the builder. + ST s (MutableByteArrayOffset s) +{-# INLINE pasteGrowST #-} +pasteGrowST n b !(MutableByteArrayOffset {array = arr0, offset = off0}) = do sz0 <- PM.getSizeofMutableByteArray arr0 let req = Nat.demote n let sz1 = off0 + req @@ -214,15 +240,16 @@ unsafeAppend (Builder f) (Builder g) = Builder $ \arr off0 s0 -> case f arr off0 s0 of (# s1, r #) -> g arr r s1 --- | Weaken the bound on the maximum number of bytes required. For example, --- to use two builders with unequal bounds in a disjunctive setting: --- --- > import qualified Arithmetic.Lte as Lte --- > --- > buildNumber :: Either Double Word64 -> Builder 32 --- > buildNumber = \case --- > Left d -> doubleDec d --- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w) +{- | Weaken the bound on the maximum number of bytes required. For example, +to use two builders with unequal bounds in a disjunctive setting: + +> import qualified Arithmetic.Lte as Lte +> +> buildNumber :: Either Double Word64 -> Builder 32 +> buildNumber = \case +> Left d -> doubleDec d +> Right w -> weaken (Lte.constant @19 @32) (word64Dec w) +-} weaken :: forall m n. (m <= n) -> Builder m -> Builder n weaken !_ (Builder f) = Builder f @@ -230,30 +257,35 @@ weaken !_ (Builder f) = Builder f substitute :: forall m n. (m :=: n) -> Builder m -> Builder n substitute !_ (Builder f) = Builder f --- | Encode a double-floating-point number, using decimal notation or --- scientific notation depending on the magnitude. This has undefined --- behavior when representing @+inf@, @-inf@, and @NaN@. It will not --- crash, but the generated numbers will be nonsense. +{- | Encode a double-floating-point number, using decimal notation or +scientific notation depending on the magnitude. This has undefined +behavior when representing @+inf@, @-inf@, and @NaN@. It will not +crash, but the generated numbers will be nonsense. +-} doubleDec :: Double -> Builder 32 doubleDec (D# d) = Builder (\arr off0 s0 -> doubleDec# d arr off0 s0) --- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word64Dec :: Word64 -> Builder 19 word64Dec (W64# w) = wordCommonDec# (word64ToWord# w) --- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word32Dec :: Word32 -> Builder 10 word32Dec (W32# w) = wordCommonDec# (C.word32ToWord# w) --- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word16Dec :: Word16 -> Builder 5 word16Dec (W16# w) = wordCommonDec# (C.word16ToWord# w) --- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. +{- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +-} word8Dec :: Word8 -> Builder 3 word8Dec (W8# w) = -- We unroll the loop when encoding Word8s. This speeds things @@ -264,96 +296,105 @@ word8Dec (W8# w) = -- L1 cache in real applications. word8Dec# (C.word8ToWord# w) --- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer --- as decimal. This encoding never starts with a zero unless the argument --- was zero. +{- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer +as decimal. This encoding never starts with a zero unless the argument +was zero. +-} wordDec :: Word -> Builder 19 wordDec (W# w) = wordCommonDec# w --- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int64Dec :: Int64 -> Builder 20 int64Dec (I64# w) = intCommonDec# (int64ToInt# w) --- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int32Dec :: Int32 -> Builder 11 int32Dec (I32# w) = intCommonDec# (C.int32ToInt# w) --- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int16Dec :: Int16 -> Builder 6 int16Dec (I16# w) = intCommonDec# (C.int16ToInt# w) --- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. --- This encoding never starts with a zero unless the argument was zero. --- Negative numbers are preceded by a minus sign. Positive numbers --- are not preceded by anything. +{- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. +This encoding never starts with a zero unless the argument was zero. +Negative numbers are preceded by a minus sign. Positive numbers +are not preceded by anything. +-} int8Dec :: Int8 -> Builder 4 int8Dec (I8# w) = intCommonDec# (C.int8ToInt# w) --- | Requires up to 20 bytes. Encodes a signed machine-sized integer --- as decimal. This encoding never starts with a zero unless the --- argument was zero. Negative numbers are preceded by a minus sign. --- Positive numbers are not preceded by anything. +{- | Requires up to 20 bytes. Encodes a signed machine-sized integer +as decimal. This encoding never starts with a zero unless the +argument was zero. Negative numbers are preceded by a minus sign. +Positive numbers are not preceded by anything. +-} intDec :: Int -> Builder 20 intDec (I# w) = intCommonDec# w word8Dec# :: Word# -> Builder 3 -{-# noinline word8Dec# #-} +{-# NOINLINE word8Dec# #-} word8Dec# w# = Unsafe.construct $ \arr off0 -> do - let !(I# off0# ) = off0 - !(!x,!ones) = quotRem w 10 - !(hundreds@(W# hundreds# ),tens@(W# tens# )) = quotRem x 10 + let !(I# off0#) = off0 + !(!x, !ones) = quotRem w 10 + !(hundreds@(W# hundreds#), tens@(W# tens#)) = quotRem x 10 writeByteArray arr off0 (fromIntegral (hundreds + 0x30) :: Word8) let !hasHundreds = gtWord# hundreds# 0## - !off1@(I# off1# ) = I# (off0# +# hasHundreds) + !off1@(I# off1#) = I# (off0# +# hasHundreds) writeByteArray arr off1 (fromIntegral (tens + 0x30) :: Word8) - let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0## ))) + let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0##))) writeByteArray arr off2 (fromIntegral (ones + 0x30) :: Word8) pure (off2 + 1) - where + where w = W# w# -- Requires a number of bytes that is bounded by the size of -- the word. This is only used internally. wordCommonDec# :: Word# -> Builder n -{-# noinline wordCommonDec# #-} -wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0 - then internalWordLoop arr off0 (W# w#) - else do - writeByteArray arr off0 (c2w '0') - pure (off0 + 1) - where +{-# NOINLINE wordCommonDec# #-} +wordCommonDec# w# = Unsafe.construct $ \arr off0 -> + if w /= 0 + then internalWordLoop arr off0 (W# w#) + else do + writeByteArray arr off0 (c2w '0') + pure (off0 + 1) + where w = W64# (wordToWord64# w#) internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# inline internalWordLoop #-} +{-# INLINE internalWordLoop #-} internalWordLoop arr off0 x0 = do off1 <- backwardsWordLoop arr off0 x0 reverseBytes arr off0 (off1 - 1) pure off1 backwardsWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int -{-# inline backwardsWordLoop #-} -backwardsWordLoop arr off0 x0 = go off0 x0 where - go !off !(x :: Word) = if x > 0 - then do - let (y,z) = quotRem x 10 - writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) - go (off + 1) y - else pure off +{-# INLINE backwardsWordLoop #-} +backwardsWordLoop arr off0 x0 = go off0 x0 + where + go !off !(x :: Word) = + if x > 0 + then do + let (y, z) = quotRem x 10 + writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) + go (off + 1) y + else pure off -- Requires up to 20 bytes. Can be less depending on what the -- size of the argument is known to be. Unsafe. intCommonDec# :: Int# -> Builder n -{-# noinline intCommonDec# #-} +{-# NOINLINE intCommonDec# #-} intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of GT -> internalWordLoop arr off0 (fromIntegral w) EQ -> do @@ -362,7 +403,7 @@ intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of LT -> do writeByteArray arr off0 (c2w '-') internalWordLoop arr (off0 + 1) (fromIntegral (negate w)) - where + where w = I64# (intToInt64# w#) -- Convert a number between 0 and 16 to the ASCII @@ -371,10 +412,11 @@ intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of -- unneeded bitmask. This actually needs a Word64 -- argument. toHexUpper :: Word -> Word8 -toHexUpper w' = fromIntegral - $ (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexUpper w' = + fromIntegral $ + (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 @@ -382,146 +424,161 @@ toHexUpper w' = fromIntegral hiSolved = w + 55 toHexLower :: Word -> Word8 -toHexLower w' = fromIntegral - $ (complement theMask .&. loSolved) - .|. (theMask .&. hiSolved) - where +toHexLower w' = + fromIntegral $ + (complement theMask .&. loSolved) + .|. (theMask .&. hiSolved) + where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 loSolved = w + 48 hiSolved = w + 87 --- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 64 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as +hexadecimal, zero-padding the encoding to 64 digits. This uses +lowercase for the alphabetical digits. +-} word256PaddedLowerHex :: Word256 -> Builder 64 word256PaddedLowerHex (Word256 w192 w128 w64 w0) = - word64PaddedLowerHex w192 - `append` word64PaddedLowerHex w128 - `append` word64PaddedLowerHex w64 - `append` word64PaddedLowerHex w0 - --- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 64 digits. This uses --- uppercase for the alphabetical digits. + word64PaddedLowerHex w192 + `append` word64PaddedLowerHex w128 + `append` word64PaddedLowerHex w64 + `append` word64PaddedLowerHex w0 + +{- | Requires exactly 64 bytes. Encodes a 256-bit unsigned integer as +hexadecimal, zero-padding the encoding to 64 digits. This uses +uppercase for the alphabetical digits. +-} word256PaddedUpperHex :: Word256 -> Builder 64 word256PaddedUpperHex (Word256 w192 w128 w64 w0) = - word64PaddedUpperHex w192 - `append` word64PaddedUpperHex w128 - `append` word64PaddedUpperHex w64 - `append` word64PaddedUpperHex w0 - - --- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 32 digits. This uses --- lowercase for the alphabetical digits. + word64PaddedUpperHex w192 + `append` word64PaddedUpperHex w128 + `append` word64PaddedUpperHex w64 + `append` word64PaddedUpperHex w0 + +{- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as +hexadecimal, zero-padding the encoding to 32 digits. This uses +lowercase for the alphabetical digits. +-} word128PaddedLowerHex :: Word128 -> Builder 32 word128PaddedLowerHex (Word128 w64 w0) = - word64PaddedLowerHex w64 - `append` word64PaddedLowerHex w0 + word64PaddedLowerHex w64 + `append` word64PaddedLowerHex w0 --- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 32 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 32 bytes. Encodes a 128-bit unsigned integer as +hexadecimal, zero-padding the encoding to 32 digits. This uses +uppercase for the alphabetical digits. +-} word128PaddedUpperHex :: Word128 -> Builder 32 word128PaddedUpperHex (Word128 w64 w0) = - word64PaddedUpperHex w64 - `append` word64PaddedUpperHex w0 - - --- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 16 digits. This uses --- uppercase for the alphabetical digits. For example, this encodes the --- number 1022 as @00000000000003FE@. + word64PaddedUpperHex w64 + `append` word64PaddedUpperHex w0 + +{- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as +hexadecimal, zero-padding the encoding to 16 digits. This uses +uppercase for the alphabetical digits. For example, this encodes the +number 1022 as @00000000000003FE@. +-} word64PaddedUpperHex :: Word64 -> Builder 16 word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# (word64ToWord# w) --- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 16 digits. This uses --- lowercase for the alphabetical digits. For example, this encodes the --- number 1022 as @00000000000003fe@. +{- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as +hexadecimal, zero-padding the encoding to 16 digits. This uses +lowercase for the alphabetical digits. For example, this encodes the +number 1022 as @00000000000003fe@. +-} word64PaddedLowerHex :: Word64 -> Builder 16 word64PaddedLowerHex (W64# w) = word64PaddedLowerHex# (word64ToWord# w) --- | Requires exactly 12 bytes. Discards the upper 16 bits of a --- 64-bit unsigned integer and then encodes the lower 48 bits as --- hexadecimal, zero-padding the encoding to 12 digits. This uses --- lowercase for the alphabetical digits. For example, this encodes the --- number 1022 as @0000000003fe@. +{- | Requires exactly 12 bytes. Discards the upper 16 bits of a +64-bit unsigned integer and then encodes the lower 48 bits as +hexadecimal, zero-padding the encoding to 12 digits. This uses +lowercase for the alphabetical digits. For example, this encodes the +number 1022 as @0000000003fe@. +-} word48PaddedLowerHex :: Word64 -> Builder 12 word48PaddedLowerHex (W64# w) = word48PaddedLowerHex# (word64ToWord# w) --- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 8 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as +hexadecimal, zero-padding the encoding to 8 digits. This uses +uppercase for the alphabetical digits. +-} word32PaddedUpperHex :: Word32 -> Builder 8 word32PaddedUpperHex (W32# w) = word32PaddedUpperHex# (C.word32ToWord# w) --- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 8 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as +hexadecimal, zero-padding the encoding to 8 digits. This uses +lowercase for the alphabetical digits. +-} word32PaddedLowerHex :: Word32 -> Builder 8 word32PaddedLowerHex (W32# w) = word32PaddedLowerHex# (C.word32ToWord# w) --- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 4 digits. This uses --- uppercase for the alphabetical digits. --- --- >>> word16PaddedUpperHex 0xab0 --- 0AB0 +{- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal, zero-padding the encoding to 4 digits. This uses +uppercase for the alphabetical digits. + +>>> word16PaddedUpperHex 0xab0 +0AB0 +-} word16PaddedUpperHex :: Word16 -> Builder 4 word16PaddedUpperHex (W16# w) = word16PaddedUpperHex# (C.word16ToWord# w) --- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 4 digits. This uses --- lowercase for the alphabetical digits. --- --- >>> word16PaddedLowerHex 0xab0 --- 0ab0 +{- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal, zero-padding the encoding to 4 digits. This uses +lowercase for the alphabetical digits. + +>>> word16PaddedLowerHex 0xab0 +0ab0 +-} word16PaddedLowerHex :: Word16 -> Builder 4 word16PaddedLowerHex (W16# w) = word16PaddedLowerHex# (C.word16ToWord# w) --- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal. No leading zeroes are displayed. Letters are presented --- in lowercase. If the number is zero, a single zero digit is used. --- --- >>> word16LowerHex 0xab0 --- ab0 +{- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal. No leading zeroes are displayed. Letters are presented +in lowercase. If the number is zero, a single zero digit is used. + +>>> word16LowerHex 0xab0 +ab0 +-} word16LowerHex :: Word16 -> Builder 4 word16LowerHex (W16# w) = word16LowerHex# (C.word16ToWord# w) --- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as --- hexadecimal. No leading zeroes are displayed. Letters are presented --- in uppercase. If the number is zero, a single zero digit is used. --- --- >>> word16UpperHex 0xab0 --- AB0 +{- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as +hexadecimal. No leading zeroes are displayed. Letters are presented +in uppercase. If the number is zero, a single zero digit is used. + +>>> word16UpperHex 0xab0 +AB0 +-} word16UpperHex :: Word16 -> Builder 4 word16UpperHex (W16# w) = word16UpperHex# (C.word16ToWord# w) --- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as --- hexadecimal. No leading zeroes are displayed. If the number is zero, --- a single zero digit is used. +{- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as +hexadecimal. No leading zeroes are displayed. If the number is zero, +a single zero digit is used. +-} word8LowerHex :: Word8 -> Builder 2 word8LowerHex (W8# w) = word8LowerHex# (C.word8ToWord# w) --- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 2 digits. This uses --- uppercase for the alphabetical digits. +{- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as +hexadecimal, zero-padding the encoding to 2 digits. This uses +uppercase for the alphabetical digits. +-} word8PaddedUpperHex :: Word8 -> Builder 2 word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# (C.word8ToWord# w) --- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as --- hexadecimal, zero-padding the encoding to 2 digits. This uses --- lowercase for the alphabetical digits. +{- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as +hexadecimal, zero-padding the encoding to 2 digits. This uses +lowercase for the alphabetical digits. +-} word8PaddedLowerHex :: Word8 -> Builder 2 word8PaddedLowerHex (W8# w) = word8PaddedLowerHex# (C.word8ToWord# w) -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedUpperHex# :: Word# -> Builder 16 -{-# noinline word64PaddedUpperHex# #-} +{-# NOINLINE word64PaddedUpperHex# #-} word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56)) @@ -540,13 +597,13 @@ word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 14) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexUpper (unsafeShiftR w 0)) pure (off + 16) - where + where w = W# w# -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word48PaddedLowerHex# :: Word# -> Builder 12 -{-# noinline word48PaddedLowerHex# #-} +{-# NOINLINE word48PaddedLowerHex# #-} word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 44)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 40)) @@ -561,13 +618,13 @@ word48PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 10) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 11) (toHexLower w) pure (off + 12) - where + where w = W# w# -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedLowerHex# :: Word# -> Builder 16 -{-# noinline word64PaddedLowerHex# #-} +{-# NOINLINE word64PaddedLowerHex# #-} word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 56)) @@ -586,11 +643,11 @@ word64PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 14) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexLower (unsafeShiftR w 0)) pure (off + 16) - where + where w = W# w# word32PaddedUpperHex# :: Word# -> Builder 8 -{-# noinline word32PaddedUpperHex# #-} +{-# NOINLINE word32PaddedUpperHex# #-} word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24)) @@ -601,11 +658,11 @@ word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 0)) pure (off + 8) - where + where w = W# w# word32PaddedLowerHex# :: Word# -> Builder 8 -{-# noinline word32PaddedLowerHex# #-} +{-# NOINLINE word32PaddedLowerHex# #-} word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 24)) @@ -616,7 +673,7 @@ word32PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 6) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexLower (unsafeShiftR w 0)) pure (off + 8) - where + where w = W# w# -- Not sure if it is beneficial to inline this. We just let @@ -629,7 +686,7 @@ word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 0)) pure (off + 4) - where + where w = W# w# word16PaddedLowerHex# :: Word# -> Builder 4 @@ -639,7 +696,7 @@ word16PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 0)) pure (off + 4) - where + where w = W# w# word12PaddedLowerHex# :: Word# -> Builder 3 @@ -648,7 +705,7 @@ word12PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 0)) pure (off + 3) - where + where w = W# w# word12PaddedUpperHex# :: Word# -> Builder 3 @@ -657,42 +714,42 @@ word12PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 0)) pure (off + 3) - where + where w = W# w# -- Definitely want this to inline. It's maybe a dozen instructions total. word8PaddedUpperHex# :: Word# -> Builder 2 -{-# inline word8PaddedUpperHex# #-} +{-# INLINE word8PaddedUpperHex# #-} word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0)) pure (off + 2) - where + where w = W# w# word8PaddedLowerHex# :: Word# -> Builder 2 -{-# inline word8PaddedLowerHex# #-} +{-# INLINE word8PaddedLowerHex# #-} word8PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 0)) pure (off + 2) - where + where w = W# w# word4PaddedLowerHex# :: Word# -> Builder 1 -{-# inline word4PaddedLowerHex# #-} +{-# INLINE word4PaddedLowerHex# #-} word4PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower w) pure (off + 1) - where + where w = W# w# word4PaddedUpperHex# :: Word# -> Builder 1 -{-# inline word4PaddedUpperHex# #-} +{-# INLINE word4PaddedUpperHex# #-} word4PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper w) pure (off + 1) - where + where w = W# w# word16UpperHex# :: Word# -> Builder 4 @@ -701,7 +758,7 @@ word16UpperHex# w# | w <= 0xFF = weaken Lte.constant (word8PaddedUpperHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedUpperHex# w#) | otherwise = word16PaddedUpperHex# w# - where + where w = W# w# word16LowerHex# :: Word# -> Builder 4 @@ -710,7 +767,7 @@ word16LowerHex# w# | w <= 0xFF = weaken Lte.constant (word8PaddedLowerHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedLowerHex# w#) | otherwise = word16PaddedLowerHex# w# - where + where w = W# w# -- Precondition: argument less than 256 @@ -718,17 +775,18 @@ word8LowerHex# :: Word# -> Builder 2 word8LowerHex# w# | w <= 0xF = weaken Lte.constant (word4PaddedLowerHex# w#) | otherwise = weaken Lte.constant (word8PaddedLowerHex# w#) - where + where w = W# w# --- | Encode a number less than 100 as a decimal number, zero-padding it to --- two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and --- 73 is encoded as @73@. --- --- Precondition: Argument must be less than 100. Failure to satisfy this --- precondition will not result in a segfault, but the resulting bytes are --- undefined. The implemention uses a heuristic for division that is inaccurate --- for large numbers. +{- | Encode a number less than 100 as a decimal number, zero-padding it to +two digits. For example: 0 is encoded as @00@, 5 is encoded as @05@, and +73 is encoded as @73@. + +Precondition: Argument must be less than 100. Failure to satisfy this +precondition will not result in a segfault, but the resulting bytes are +undefined. The implemention uses a heuristic for division that is inaccurate +for large numbers. +-} wordPaddedDec2 :: Word -> Builder 2 wordPaddedDec2 !w = Unsafe.construct $ \arr off -> do let d1 = approxDiv10 w @@ -737,172 +795,200 @@ wordPaddedDec2 !w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (unsafeWordToWord8 (d2 + 48)) pure (off + 2) --- | Encode a number less than 10000 as a decimal number, zero-padding it to --- two digits. For example: 0 is encoded as @0000@, 5 is encoded as @0005@, --- and 73 is encoded as @0073@. --- --- Precondition: Argument must be less than 10000. Failure to satisfy this --- precondition will not result in a segfault, but the resulting bytes are --- undefined. The implemention uses a heuristic for division that is inaccurate --- for large numbers. +{- | Encode a number less than 10000 as a decimal number, zero-padding it to +two digits. For example: 0 is encoded as @0000@, 5 is encoded as @0005@, +and 73 is encoded as @0073@. + +Precondition: Argument must be less than 10000. Failure to satisfy this +precondition will not result in a segfault, but the resulting bytes are +undefined. The implemention uses a heuristic for division that is inaccurate +for large numbers. +-} wordPaddedDec4 :: Word -> Builder 4 wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do putRem10 - (putRem10 $ putRem10 $ putRem10 - (\_ _ _ -> pure ()) - ) arr (off + 3) w + ( putRem10 $ + putRem10 $ + putRem10 + (\_ _ _ -> pure ()) + ) + arr + (off + 3) + w pure (off + 4) wordPaddedDec3 :: Word -> Builder 3 wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do putRem10 - (putRem10 $ putRem10 - (\_ _ _ -> pure ()) - ) arr (off + 2) w + ( putRem10 $ + putRem10 + (\_ _ _ -> pure ()) + ) + arr + (off + 2) + w pure (off + 3) --- | Encode a number less than 1e9 as a decimal number, zero-padding it to --- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as --- @000000005@. --- --- Precondition: Argument must be less than 1e9. Failure to satisfy this --- precondition will not result in a segfault, but the resulting bytes are --- undefined. The implemention uses a heuristic for division that is inaccurate --- for large numbers. +{- | Encode a number less than 1e9 as a decimal number, zero-padding it to +nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as +@000000005@. + +Precondition: Argument must be less than 1e9. Failure to satisfy this +precondition will not result in a segfault, but the resulting bytes are +undefined. The implemention uses a heuristic for division that is inaccurate +for large numbers. +-} wordPaddedDec9 :: Word -> Builder 9 wordPaddedDec9 !w = Unsafe.construct $ \arr off -> do putRem10 - (putRem10 $ putRem10 $ putRem10 $ putRem10 $ putRem10 $ - putRem10 $ putRem10 $ putRem10 - (\_ _ _ -> pure ()) - ) arr (off + 8) w + ( putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 $ + putRem10 + (\_ _ _ -> pure ()) + ) + arr + (off + 8) + w pure (off + 9) putRem10 :: (MutableByteArray s -> Int -> Word -> ST s a) -> MutableByteArray s -> Int -> Word -> ST s a -{-# inline putRem10 #-} +{-# INLINE putRem10 #-} putRem10 andThen arr off dividend = do let quotient = approxDiv10 dividend remainder = dividend - (10 * quotient) writeByteArray arr off (unsafeWordToWord8 (remainder + 48)) andThen arr (off - 1) quotient --- | Encode an ASCII character. --- Precondition: Input must be an ASCII character. This is not checked. +{- | Encode an ASCII character. +Precondition: Input must be an ASCII character. This is not checked. +-} ascii :: Char -> Builder 1 ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c) - pure (I# (off +# 1# )) + pure (I# (off +# 1#)) --- | Encode two ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode two ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii2 :: Char -> Char -> Builder 2 ascii2 (C# c0) (C# c1) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - pure (I# (off +# 2# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + pure (I# (off +# 2#)) --- | Encode three ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode three ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii3 :: Char -> Char -> Char -> Builder 3 ascii3 (C# c0) (C# c1) (C# c2) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - pure (I# (off +# 3# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + pure (I# (off +# 3#)) --- | Encode four ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. +{- | Encode four ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii4 :: Char -> Char -> Char -> Char -> Builder 4 ascii4 (C# c0) (C# c1) (C# c2) (C# c3) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - pure (I# (off +# 4# )) - --- | Encode five ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + pure (I# (off +# 4#)) + +{- | Encode five ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii5 :: Char -> Char -> Char -> Char -> Char -> Builder 5 ascii5 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - pure (I# (off +# 5# )) - --- | Encode six ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + pure (I# (off +# 5#)) + +{- | Encode six ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii6 :: Char -> Char -> Char -> Char -> Char -> Char -> Builder 6 ascii6 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - primitive_ (writeCharArray# arr (off +# 5# ) c5) - pure (I# (off +# 6# )) - --- | Encode seven ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + primitive_ (writeCharArray# arr (off +# 5#) c5) + pure (I# (off +# 6#)) + +{- | Encode seven ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder 7 ascii7 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - primitive_ (writeCharArray# arr (off +# 5# ) c5) - primitive_ (writeCharArray# arr (off +# 6# ) c6) - pure (I# (off +# 7# )) - --- | Encode eight ASCII characters. Precondition: Must be an ASCII characters. --- This is not checked. + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + primitive_ (writeCharArray# arr (off +# 5#) c5) + primitive_ (writeCharArray# arr (off +# 6#) c6) + pure (I# (off +# 7#)) + +{- | Encode eight ASCII characters. Precondition: Must be an ASCII characters. +This is not checked. +-} ascii8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Builder 8 ascii8 (C# c0) (C# c1) (C# c2) (C# c3) (C# c4) (C# c5) (C# c6) (C# c7) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c0) - primitive_ (writeCharArray# arr (off +# 1# ) c1) - primitive_ (writeCharArray# arr (off +# 2# ) c2) - primitive_ (writeCharArray# arr (off +# 3# ) c3) - primitive_ (writeCharArray# arr (off +# 4# ) c4) - primitive_ (writeCharArray# arr (off +# 5# ) c5) - primitive_ (writeCharArray# arr (off +# 6# ) c6) - primitive_ (writeCharArray# arr (off +# 7# ) c7) - pure (I# (off +# 8# )) + primitive_ (writeCharArray# arr (off +# 1#) c1) + primitive_ (writeCharArray# arr (off +# 2#) c2) + primitive_ (writeCharArray# arr (off +# 3#) c3) + primitive_ (writeCharArray# arr (off +# 4#) c4) + primitive_ (writeCharArray# arr (off +# 5#) c5) + primitive_ (writeCharArray# arr (off +# 6#) c6) + primitive_ (writeCharArray# arr (off +# 7#) c7) + pure (I# (off +# 8#)) -- | Encode a machine-sized word with VLQ (also known as VByte, Varint, VInt). wordVlq :: Word -> Builder 10 -{-# inline wordVlq #-} +{-# INLINE wordVlq #-} wordVlq (W# w) = vlqCommon (W# w) -- | Encode a 32-bit word with VLQ (also known as VByte, Varint, VInt). word32Vlq :: Word32 -> Builder 5 -{-# inline word32Vlq #-} +{-# INLINE word32Vlq #-} word32Vlq (W32# w) = vlqCommon (W# (C.word32ToWord# w)) -- | Encode a 64-bit word with VLQ (also known as VByte, Varint, VInt). word64Vlq :: Word64 -> Builder 10 -{-# inline word64Vlq #-} +{-# INLINE word64Vlq #-} word64Vlq (W64# w) = vlqCommon (W# (word64ToWord# w)) -- | Encode a machine-sized word with LEB-128. wordLEB128 :: Word -> Builder 10 -{-# inline wordLEB128 #-} +{-# INLINE wordLEB128 #-} wordLEB128 (W# w) = lebCommon (W# w) -- | Encode a 32-bit word with LEB-128. word16LEB128 :: Word16 -> Builder 3 -{-# inline word16LEB128 #-} +{-# INLINE word16LEB128 #-} word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w)) -- | Encode a 32-bit word with LEB-128. word32LEB128 :: Word32 -> Builder 5 -{-# inline word32LEB128 #-} +{-# INLINE word32LEB128 #-} word32LEB128 (W32# w) = lebCommon (W# (C.word32ToWord# w)) -- | Encode a 64-bit word with LEB-128. word64LEB128 :: Word64 -> Builder 10 -{-# inline word64LEB128 #-} +{-# INLINE word64LEB128 #-} word64LEB128 (W64# w) = lebCommon (W# (word64ToWord# w)) vlqCommon :: Word -> Builder n @@ -913,23 +999,25 @@ vlqCommon !w = case w of in vlqStep startIx w vlqStep :: - Int -- start index, must be in range [0,63] and 7 must divide it evenly - -> Word - -> Builder n + Int -> -- start index, must be in range [0,63] and 7 must divide it evenly + Word -> + Builder n vlqStep !ix !w | ix <= 0 = unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .&. 0b0111_1111)) - | otherwise = unsafeAppend - (unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000))) - (vlqStep (ix - 7) w) + | otherwise = + unsafeAppend + (unsafeWord8 (unsafeWordToWord8 (unsafeShiftR w ix .|. 0b1000_0000))) + (vlqStep (ix - 7) w) lebCommon :: Word -> Builder n lebCommon !w = case quotRem w 128 of - (q,r) -> case q of + (q, r) -> case q of 0 -> unsafeWord8 (unsafeWordToWord8 r) - _ -> unsafeAppend - (unsafeWord8 (unsafeWordToWord8 (r .|. 0x80))) - (lebCommon q) + _ -> + unsafeAppend + (unsafeWord8 (unsafeWordToWord8 (r .|. 0x80))) + (lebCommon q) -- | Encode a character as UTF-8. This only uses as much space as is required. char :: Char -> Builder 4 @@ -938,63 +1026,62 @@ char c writeByteArray arr off (unsafeWordToWord8 codepoint) pure (off + 1) | codepoint < 0x800 = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint)) return (off + 2) | codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do -- Codepoint U+FFFD - writeByteArray arr off (0xEF :: Word8) + writeByteArray arr off (0xEF :: Word8) writeByteArray arr (off + 1) (0xBF :: Word8) writeByteArray arr (off + 2) (0xBD :: Word8) return (off + 3) | codepoint < 0x10000 = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint)) return (off + 3) | otherwise = Unsafe.construct $ \arr off -> do - writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) + writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint)) writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint)) return (off + 4) + where + codepoint :: Word + codepoint = fromIntegral (ord c) - where - codepoint :: Word - codepoint = fromIntegral (ord c) + -- precondition: codepoint is less than 0x800 + byteTwoOne :: Word -> Word + byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 - -- precondition: codepoint is less than 0x800 - byteTwoOne :: Word -> Word - byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 + byteTwoTwo :: Word -> Word + byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 - byteTwoTwo :: Word -> Word - byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 + -- precondition: codepoint is less than 0x1000 + byteThreeOne :: Word -> Word + byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 - -- precondition: codepoint is less than 0x1000 - byteThreeOne :: Word -> Word - byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 + byteThreeTwo :: Word -> Word + byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - byteThreeTwo :: Word -> Word - byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 + byteThreeThree :: Word -> Word + byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 - byteThreeThree :: Word -> Word - byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 + -- precondition: codepoint is less than 0x110000 + byteFourOne :: Word -> Word + byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 - -- precondition: codepoint is less than 0x110000 - byteFourOne :: Word -> Word - byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 + byteFourTwo :: Word -> Word + byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 - byteFourTwo :: Word -> Word - byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 + byteFourThree :: Word -> Word + byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - byteFourThree :: Word -> Word - byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 - - byteFourFour :: Word -> Word - byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 + byteFourFour :: Word -> Word + byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 int64BE :: Int64 -> Builder 8 -int64BE (I64# i) = word64BE (W64# ( wordToWord64# (int2Word# (int64ToInt# i)))) +int64BE (I64# i) = word64BE (W64# (wordToWord64# (int2Word# (int64ToInt# i)))) int32BE :: Int32 -> Builder 4 int32BE (I32# i) = word32BE (W32# (C.wordToWord32# (int2Word# (C.int32ToInt# i)))) @@ -1003,7 +1090,7 @@ int16BE :: Int16 -> Builder 2 int16BE (I16# i) = word16BE (W16# (C.wordToWord16# (int2Word# (C.int16ToInt# i)))) int64LE :: Int64 -> Builder 8 -int64LE (I64# i) = word64LE (W64# ( wordToWord64# (int2Word# (int64ToInt# i)))) +int64LE (I64# i) = word64LE (W64# (wordToWord64# (int2Word# (int64ToInt# i)))) int32LE :: Int32 -> Builder 4 int32LE (I32# i) = word32LE (W32# (C.wordToWord32# (int2Word# (C.int32ToInt# i)))) @@ -1023,8 +1110,9 @@ word256LE (Word256 hi mhi mlo lo) = word64LE lo `append` word64LE mlo `append` w word256BE :: Word256 -> Builder 32 word256BE (Word256 hi mhi mlo lo) = word64BE hi `append` word64BE mhi `append` word64BE mlo `append` word64BE lo --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a little-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a little-endian fashion. +-} word64LE :: Word64 -> Builder 8 word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) @@ -1034,14 +1122,15 @@ word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off ) (fromIntegral @Word64 @Word8 w) + writeByteArray arr (off) (fromIntegral @Word64 @Word8 w) pure (off + 8) --- | Requires exactly 8 bytes. Dump the octets of a 64-bit --- word in a big-endian fashion. +{- | Requires exactly 8 bytes. Dump the octets of a 64-bit +word in a big-endian fashion. +-} word64BE :: Word64 -> Builder 8 word64BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) + writeByteArray arr (off) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40)) writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32)) @@ -1051,39 +1140,43 @@ word64BE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 w) pure (off + 8) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a little-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a little-endian fashion. +-} word32LE :: Word32 -> Builder 4 word32LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off ) (fromIntegral @Word32 @Word8 w) + writeByteArray arr (off) (fromIntegral @Word32 @Word8 w) pure (off + 4) --- | Requires exactly 4 bytes. Dump the octets of a 32-bit --- word in a big-endian fashion. +{- | Requires exactly 4 bytes. Dump the octets of a 32-bit +word in a big-endian fashion. +-} word32BE :: Word32 -> Builder 4 word32BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) + writeByteArray arr (off) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 w) pure (off + 4) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a little-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a little-endian fashion. +-} word16LE :: Word16 -> Builder 2 word16LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) - writeByteArray arr (off ) (fromIntegral @Word16 @Word8 w) + writeByteArray arr (off) (fromIntegral @Word16 @Word8 w) pure (off + 2) --- | Requires exactly 2 bytes. Dump the octets of a 16-bit --- word in a big-endian fashion. +{- | Requires exactly 2 bytes. Dump the octets of a 16-bit +word in a big-endian fashion. +-} word16BE :: Word16 -> Builder 2 word16BE w = Unsafe.construct $ \arr off -> do - writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) + writeByteArray arr (off) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w) pure (off + 2) @@ -1100,16 +1193,18 @@ unsafeWord8 w = Unsafe.construct $ \arr off -> do -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () -{-# inline reverseBytes #-} -reverseBytes arr begin end = go begin end where - go ixA ixB = if ixA < ixB - then do - a :: Word8 <- readByteArray arr ixA - b :: Word8 <- readByteArray arr ixB - writeByteArray arr ixA b - writeByteArray arr ixB a - go (ixA + 1) (ixB - 1) - else pure () +{-# INLINE reverseBytes #-} +reverseBytes arr begin end = go begin end + where + go ixA ixB = + if ixA < ixB + then do + a :: Word8 <- readByteArray arr ixA + b :: Word8 <- readByteArray arr ixB + writeByteArray arr ixA b + writeByteArray arr ixB a + go (ixA + 1) (ixB - 1) + else pure () c2w :: Char -> Word8 c2w = fromIntegral . ord @@ -1123,8 +1218,13 @@ shrinkMutableByteArray (MutableByteArray arr) (I# sz) = -- inaccurate. This is very visible when encoding a number like 2.25, which -- is perfectly represented as an IEEE 754 floating point number but is goofed -- up by this function. -doubleDec# :: forall s. - Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) +doubleDec# :: + forall s. + Double# -> + MutableByteArray# s -> + Int# -> + State# s -> + (# State# s, Int# #) doubleDec# d# marr# off# s0 = case unsafeIOToST (c_paste_double marr# off# d#) of ST f -> case f s0 of @@ -1138,5 +1238,6 @@ approxDiv10 !n = unsafeShiftR (0x1999999A * n) 32 unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# (C.wordToWord8# w) -foreign import ccall unsafe "bytebuild_paste_double" c_paste_double :: - MutableByteArray# s -> Int# -> Double# -> IO Int +foreign import ccall unsafe "bytebuild_paste_double" + c_paste_double :: + MutableByteArray# s -> Int# -> Double# -> IO Int diff --git a/src/Data/Bytes/Builder/Bounded/Class.hs b/src/Data/Bytes/Builder/Bounded/Class.hs index 9a630d1..29c16e5 100644 --- a/src/Data/Bytes/Builder/Bounded/Class.hs +++ b/src/Data/Bytes/Builder/Bounded/Class.hs @@ -1,8 +1,8 @@ -{-# language DataKinds #-} -{-# language TypeFamilies #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} module Data.Bytes.Builder.Bounded.Class - ( ToBoundedBuilder(..) + ( ToBoundedBuilder (..) ) where import Data.Int @@ -11,14 +11,15 @@ import Data.Word import qualified Data.Bytes.Builder.Bounded as Bounded import qualified GHC.TypeNats as GHC --- | Variant of To that can be encoded as a builder. Human-readable encodings --- are used when possible. For example, numbers are encoded an ascii-encoded --- decimal characters. UTF-8 is preferred for textual types. For types --- that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes --- are preserved. --- --- The goal of this typeclass is to reduce the size of builders produced --- by quasiquotation. +{- | Variant of To that can be encoded as a builder. Human-readable encodings +are used when possible. For example, numbers are encoded an ascii-encoded +decimal characters. UTF-8 is preferred for textual types. For types +that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes +are preserved. + +The goal of this typeclass is to reduce the size of builders produced +by quasiquotation. +-} class ToBoundedBuilder a where type BoundedBuilderLength a :: GHC.Nat toBuilder :: a -> Bounded.Builder (BoundedBuilderLength a) diff --git a/src/Data/Bytes/Builder/Bounded/Unsafe.hs b/src/Data/Bytes/Builder/Bounded/Unsafe.hs index 3c812bc..c983e04 100644 --- a/src/Data/Bytes/Builder/Bounded/Unsafe.hs +++ b/src/Data/Bytes/Builder/Bounded/Unsafe.hs @@ -1,62 +1,66 @@ -{-# language DataKinds #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language MagicHash #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Builder.Bounded.Unsafe ( -- * Types - Builder(..) + Builder (..) + -- * Construct , construct + -- * Run , pasteST , pasteIO ) where import Data.Kind (Type) -import Data.Primitive (MutableByteArray(..)) -import GHC.Exts (Int(I#),RealWorld,Int#,State#,MutableByteArray#) +import Data.Primitive (MutableByteArray (..)) +import GHC.Exts (Int (I#), Int#, MutableByteArray#, RealWorld, State#) import GHC.IO (stToIO) -import GHC.ST (ST(ST)) +import GHC.ST (ST (ST)) import GHC.TypeLits (Nat) --- | A builder parameterized by the maximum number of bytes it uses --- when executed. +{- | A builder parameterized by the maximum number of bytes it uses +when executed. +-} newtype Builder :: Nat -> Type where - Builder :: - (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) - -- ^ This function takes a buffer, an offset, and a number of remaining bytes. - -- It returns the new offset. - -> Builder n - - --- | Constructor for 'Builder' that works on a function with lifted --- arguments instead of unlifted ones. This is just as unsafe as the --- actual constructor. + Builder :: + -- | This function takes a buffer, an offset, and a number of remaining bytes. + -- It returns the new offset. + (forall s. MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #)) -> + Builder n + +{- | Constructor for 'Builder' that works on a function with lifted +arguments instead of unlifted ones. This is just as unsafe as the +actual constructor. +-} construct :: (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n -{-# inline construct #-} -construct f = Builder - $ \arr off s0 -> +{-# INLINE construct #-} +construct f = Builder $ + \arr off s0 -> case unST (f (MutableByteArray arr) (I# off)) s0 of (# s1, (I# n) #) -> (# s1, n #) --- | This function does not enforce the known upper bound on the --- size. It is up to the user to do this. +{- | This function does not enforce the known upper bound on the +size. It is up to the user to do this. +-} pasteST :: Builder n -> MutableByteArray s -> Int -> ST s Int -{-# inline pasteST #-} +{-# INLINE pasteST #-} pasteST (Builder f) (MutableByteArray arr) (I# off) = ST $ \s0 -> case f arr off s0 of (# s1, r #) -> (# s1, (I# r) #) --- | This function does not enforce the known upper bound on the --- size. It is up to the user to do this. +{- | This function does not enforce the known upper bound on the +size. It is up to the user to do this. +-} pasteIO :: Builder n -> MutableByteArray RealWorld -> Int -> IO Int -{-# inline pasteIO #-} +{-# INLINE pasteIO #-} pasteIO b m off = stToIO (pasteST b m off) unST :: ST s a -> State# s -> (# State# s, a #) unST (ST f) = f - diff --git a/src/Data/Bytes/Builder/Class.hs b/src/Data/Bytes/Builder/Class.hs index bc9440c..fcaf303 100644 --- a/src/Data/Bytes/Builder/Class.hs +++ b/src/Data/Bytes/Builder/Class.hs @@ -1,13 +1,12 @@ {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE TypeSynonymInstances #-} module Data.Bytes.Builder.Class - ( ToBuilder(..) + ( ToBuilder (..) ) where +import Data.ByteString.Short (ShortByteString) import Data.Bytes (Bytes) import Data.Bytes.Builder (Builder) -import Data.ByteString.Short (ShortByteString) import Data.Int import Data.Primitive.ByteArray (ByteArray) import Data.Text.Short (ShortText) @@ -15,14 +14,15 @@ import Data.Word import qualified Data.Bytes.Builder as Builder --- | Types that can be encoded as a builder. Human-readable encodings --- are used when possible. For example, numbers are encoded an ascii-encoded --- decimal characters. UTF-8 is preferred for textual types. For types --- that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes --- are preserved. --- --- The goal of this typeclass is to reduce the size of builders produced --- by quasiquotation. +{- | Types that can be encoded as a builder. Human-readable encodings +are used when possible. For example, numbers are encoded an ascii-encoded +decimal characters. UTF-8 is preferred for textual types. For types +that represent arbitrary bytes (e.g. Bytes, ByteString), the bytes +are preserved. + +The goal of this typeclass is to reduce the size of builders produced +by quasiquotation. +-} class ToBuilder a where toBuilder :: a -> Builder diff --git a/src/Data/Bytes/Builder/Template.hs b/src/Data/Bytes/Builder/Template.hs index 1fd775c..f2b95dd 100644 --- a/src/Data/Bytes/Builder/Template.hs +++ b/src/Data/Bytes/Builder/Template.hs @@ -9,50 +9,53 @@ module Data.Bytes.Builder.Template import Control.Monad (when) import Data.Bytes.Builder.Class (toBuilder) -import GHC.Ptr (Ptr(Ptr)) +import GHC.Ptr (Ptr (Ptr)) import Language.Haskell.Meta.Parse (parseExp) -import Language.Haskell.TH (Q,Exp) -import Language.Haskell.TH.Lib (integerL,stringPrimL,litE) -import Language.Haskell.TH.Quote (QuasiQuoter(..)) +import Language.Haskell.TH (Exp, Q) +import Language.Haskell.TH.Lib (integerL, litE, stringPrimL) +import Language.Haskell.TH.Quote (QuasiQuoter (..)) -import qualified Data.Bytes.Builder as Builder import qualified Data.ByteString.Short as SBS +import qualified Data.Bytes.Builder as Builder import qualified Data.Text.Short as TS import qualified Language.Haskell.TH as TH --- | A quasiquoter for builders. Haskell expressions are interpolated --- with backticks, and the @ToBuilder@ class is used to convert them --- to builders. Several common escape sequences for whitespace and --- control characters are recongized. Consider the following expression, --- where the binding @partition@ has type @Word32@: --- --- > [templ|[WARN] Partition `partition` has invalid data.\n|] --- --- This expression has type @Builder@ and expands to: --- --- > Builder.cstringLen (Ptr "[WARN] Partition "#, 17) <> --- > Builder.toBuilder partition <> --- > Builder.cstringLen (Ptr " has invalid data.\n"#, 19) --- --- The @ToBuilder@ instance for @Word32@ uses decimal encoding, so this --- would result in the following if @partition@ was 42 (with a newline --- character at the end): --- --- > [WARN] Partition 42 has invalid data. --- --- In the future, a more sophisticated @bbldr@ variant will be added --- that will support expressions where the maximum length of the entire --- builder can be computed at compile time. +{- | A quasiquoter for builders. Haskell expressions are interpolated +with backticks, and the @ToBuilder@ class is used to convert them +to builders. Several common escape sequences for whitespace and +control characters are recongized. Consider the following expression, +where the binding @partition@ has type @Word32@: + +> [templ|[WARN] Partition `partition` has invalid data.\n|] + +This expression has type @Builder@ and expands to: + +> Builder.cstringLen (Ptr "[WARN] Partition "#, 17) <> +> Builder.toBuilder partition <> +> Builder.cstringLen (Ptr " has invalid data.\n"#, 19) + +The @ToBuilder@ instance for @Word32@ uses decimal encoding, so this +would result in the following if @partition@ was 42 (with a newline +character at the end): + +> [WARN] Partition 42 has invalid data. + +In the future, a more sophisticated @bbldr@ variant will be added +that will support expressions where the maximum length of the entire +builder can be computed at compile time. +-} bldr :: QuasiQuoter -bldr = QuasiQuoter - { quoteExp = templExp - , quotePat = notHandled "patterns" - , quoteType = notHandled "types" - , quoteDec = notHandled "declarations" - } - where - notHandled things _ = fail $ - things ++ "are not handled by the byte template quasiquoter" +bldr = + QuasiQuoter + { quoteExp = templExp + , quotePat = notHandled "patterns" + , quoteType = notHandled "types" + , quoteDec = notHandled "declarations" + } + where + notHandled things _ = + fail $ + things ++ "are not handled by the byte template quasiquoter" templExp :: String -> Q Exp templExp inp = do @@ -62,7 +65,7 @@ templExp inp = do Right [] -> fail "empty template" Right v -> pure v let expParts = compile <$> rawParts - foldl1 (\e1 e2 -> [| $e1 <> $e2 |]) expParts + foldl1 (\e1 e2 -> [|$e1 <> $e2|]) expParts checkOverloadedStrings :: Q () checkOverloadedStrings = do @@ -87,40 +90,40 @@ compile (Splice str) = case parseExp str of parse :: String -> Either String Template parse = partsLoop - where + where partsLoop "" = do pure [] - partsLoop ('`':inp) = do + partsLoop ('`' : inp) = do (!spl, !rest) <- spliceLoop inp - (Splice spl:) <$> partsLoop rest + (Splice spl :) <$> partsLoop rest partsLoop inp = do (!lit, !rest) <- litLoop "" inp - (Literal lit:) <$> partsLoop rest + (Literal lit :) <$> partsLoop rest litLoop :: String -> String -> Either String (String, String) litLoop !acc rest@"" = pure (reverse acc, rest) - litLoop !acc rest@('`':_) = pure (reverse acc, rest) - litLoop !acc ('\\':next) = do + litLoop !acc rest@('`' : _) = pure (reverse acc, rest) + litLoop !acc ('\\' : next) = do (c, rest) <- parseEscape next - litLoop (c:acc) rest - litLoop !acc (c:rest) = litLoop (c:acc) rest + litLoop (c : acc) rest + litLoop !acc (c : rest) = litLoop (c : acc) rest spliceLoop :: String -> Either String (String, String) spliceLoop inp = case break (== '`') inp of ([], _) -> Left "internal error" - (hs, '`':rest) -> pure (hs, rest) - (_, _:_) -> Left "internal error" + (hs, '`' : rest) -> pure (hs, rest) + (_, _ : _) -> Left "internal error" (_, []) -> Left "unterminated interpolation" parseEscape :: String -> Either String (Char, String) parseEscape "" = Left "incomplete escape" - parseEscape ('\\':rest) = pure ('\\', rest) - parseEscape ('`':rest) = pure ('`', rest) - parseEscape ('\'':rest) = pure ('\'', rest) - parseEscape ('\"':rest) = pure ('\"', rest) - parseEscape ('0':rest) = pure ('\0', rest) - parseEscape ('a':rest) = pure ('\a', rest) - parseEscape ('b':rest) = pure ('\b', rest) - parseEscape ('f':rest) = pure ('\f', rest) - parseEscape ('n':rest) = pure ('\n', rest) - parseEscape ('r':rest) = pure ('\r', rest) - parseEscape ('t':rest) = pure ('\t', rest) - parseEscape ('v':rest) = pure ('\v', rest) - parseEscape (c:_) = Left $ "unrecognized escape: \\" ++ [c] + parseEscape ('\\' : rest) = pure ('\\', rest) + parseEscape ('`' : rest) = pure ('`', rest) + parseEscape ('\'' : rest) = pure ('\'', rest) + parseEscape ('\"' : rest) = pure ('\"', rest) + parseEscape ('0' : rest) = pure ('\0', rest) + parseEscape ('a' : rest) = pure ('\a', rest) + parseEscape ('b' : rest) = pure ('\b', rest) + parseEscape ('f' : rest) = pure ('\f', rest) + parseEscape ('n' : rest) = pure ('\n', rest) + parseEscape ('r' : rest) = pure ('\r', rest) + parseEscape ('t' : rest) = pure ('\t', rest) + parseEscape ('v' : rest) = pure ('\v', rest) + parseEscape (c : _) = Left $ "unrecognized escape: \\" ++ [c] diff --git a/src/Data/Bytes/Builder/Unsafe.hs b/src/Data/Bytes/Builder/Unsafe.hs index 86bbe22..bcfbdf8 100644 --- a/src/Data/Bytes/Builder/Unsafe.hs +++ b/src/Data/Bytes/Builder/Unsafe.hs @@ -1,55 +1,60 @@ -{-# language BangPatterns #-} -{-# language DuplicateRecordFields #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Builder.Unsafe ( -- * Types - Builder(..) - , BuilderState(..) - , Commits(..) + Builder (..) + , BuilderState (..) + , Commits (..) + -- * Execution , pasteST , pasteIO + -- * Construction , fromEffect + -- * Builder State , newBuilderState , closeBuilderState + -- * Finalization , reverseCommitsOntoChunks , commitsOntoChunks , copyReverseCommits , addCommitsLength + -- * Commit Distance , commitDistance , commitDistance1 + -- * Safe Functions + -- | These functions are actually completely safe, but they are defined -- here because they are used by typeclass instances. Import them from -- @Data.Bytes.Builder@ instead. , stringUtf8 , cstring + -- * Pasting with Preconditions , pasteUtf8TextJson# ) where import Control.Monad.Primitive (primitive_) -import Data.Bytes.Chunks (Chunks(ChunksCons)) -import Data.Bytes.Types (Bytes(Bytes)) +import Data.Bytes.Chunks (Chunks (ChunksCons)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) -import Data.Primitive (MutableByteArray(..),ByteArray(..)) +import Data.Primitive (ByteArray (..), MutableByteArray (..)) import Data.Word (Word8) import Foreign.C.String (CString) -import GHC.Base (unpackCString#,unpackCStringUtf8#) -import GHC.Exts ((-#),(+#),(>#),(>=#),Char(C#)) -import GHC.Exts (Addr#,ByteArray#,MutableByteArray#,Int(I#),Ptr(Ptr)) -import GHC.Exts (RealWorld,IsString,Int#,State#) +import GHC.Base (unpackCString#, unpackCStringUtf8#) +import GHC.Exts (Addr#, ByteArray#, Char (C#), Int (I#), Int#, IsString, MutableByteArray#, Ptr (Ptr), RealWorld, State#, (+#), (-#), (>#), (>=#)) import GHC.IO (stToIO) -import GHC.ST (ST(ST)) +import GHC.ST (ST (ST)) import qualified Compat as C import qualified Data.Bytes.Builder.Bounded as Bounded @@ -58,46 +63,52 @@ import qualified Data.Primitive as PM import qualified GHC.Exts as Exts import qualified Op --- | An unmaterialized sequence of bytes that may be pasted --- into a mutable byte array. +{- | An unmaterialized sequence of bytes that may be pasted +into a mutable byte array. +-} newtype Builder - = Builder (forall s. - MutableByteArray# s -> -- buffer we are currently writing to - Int# -> -- offset into the current buffer - Int# -> -- number of bytes remaining in the current buffer - Commits s -> -- buffers and immutable byte slices that we have already committed - State# s -> - (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things - ) - --- | A list of committed chunks along with the chunk currently being --- written to. This is kind of like a non-empty variant of 'Commmits' --- but with the additional invariant that the head chunk is a mutable --- byte array. -data BuilderState s = BuilderState - (MutableByteArray# s) -- buffer we are currently writing to - Int# -- offset into the current buffer - Int# -- number of bytes remaining in the current buffer - !(Commits s) -- buffers and immutable byte slices that are already committed + = Builder + ( forall s. + MutableByteArray# s -> -- buffer we are currently writing to + Int# -> -- offset into the current buffer + Int# -> -- number of bytes remaining in the current buffer + Commits s -> -- buffers and immutable byte slices that we have already committed + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -- all the same things + ) + +{- | A list of committed chunks along with the chunk currently being +written to. This is kind of like a non-empty variant of 'Commmits' +but with the additional invariant that the head chunk is a mutable +byte array. +-} +data BuilderState s + = BuilderState + (MutableByteArray# s) -- buffer we are currently writing to + Int# -- offset into the current buffer + Int# -- number of bytes remaining in the current buffer + !(Commits s) -- buffers and immutable byte slices that are already committed -- | Create an empty 'BuilderState' with a buffer of the given size. newBuilderState :: Int -> ST s (BuilderState s) -{-# inline newBuilderState #-} -newBuilderState n@(I# n# ) = do +{-# INLINE newBuilderState #-} +newBuilderState n@(I# n#) = do MutableByteArray buf <- PM.newByteArray n pure (BuilderState buf 0# n# Initial) --- | Push the active chunk onto the top of the commits. --- The @BuilderState@ argument must not be reused after being passed --- to this function. That is, its use must be affine. +{- | Push the active chunk onto the top of the commits. +The @BuilderState@ argument must not be reused after being passed +to this function. That is, its use must be affine. +-} closeBuilderState :: BuilderState s -> Commits s closeBuilderState (BuilderState dst off _ cmts) = Mutable dst off cmts --- | Run a builder, performing an in-place update on the state. --- The @BuilderState@ argument must not be reused after being passed --- to this function. That is, its use must be affine. +{- | Run a builder, performing an in-place update on the state. +The @BuilderState@ argument must not be reused after being passed +to this function. That is, its use must be affine. +-} pasteST :: Builder -> BuilderState s -> ST s (BuilderState s) -{-# inline pasteST #-} +{-# INLINE pasteST #-} pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 -> case f buf off len cmts s0 of (# s1, buf1, off1, len1, cmts1 #) -> @@ -105,48 +116,54 @@ pasteST (Builder f) (BuilderState buf off len cmts) = ST $ \s0 -> -- | Variant of 'pasteST' that runs in 'IO'. pasteIO :: Builder -> BuilderState RealWorld -> IO (BuilderState RealWorld) -{-# inline pasteIO #-} +{-# INLINE pasteIO #-} pasteIO b st = stToIO (pasteST b st) instance IsString Builder where - {-# inline fromString #-} + {-# INLINE fromString #-} fromString = stringUtf8 instance Semigroup Builder where - {-# inline (<>) #-} + {-# INLINE (<>) #-} Builder f <> Builder g = Builder $ \buf0 off0 len0 cs0 s0 -> case f buf0 off0 len0 cs0 s0 of (# s1, buf1, off1, len1, cs1 #) -> g buf1 off1 len1 cs1 s1 instance Monoid Builder where - {-# inline mempty #-} + {-# INLINE mempty #-} mempty = Builder $ \buf0 off0 len0 cs0 s0 -> (# s0, buf0, off0, len0, cs0 #) data Commits s = Mutable + -- | Mutable buffer, start index implicitly zero (MutableByteArray# s) - -- ^ Mutable buffer, start index implicitly zero - Int# -- ^ Length (may be smaller than actual length) + -- | Length (may be smaller than actual length) + Int# !(Commits s) | Immutable - ByteArray# -- ^ Immutable chunk - Int# -- ^ Offset into chunk, not necessarily zero - Int# -- ^ Length (may be smaller than actual length) + -- | Immutable chunk + ByteArray# + -- | Offset into chunk, not necessarily zero + Int# + -- | Length (may be smaller than actual length) + Int# !(Commits s) | Initial --- | Add the total number of bytes in the commits to first --- argument. +{- | Add the total number of bytes in the commits to first +argument. +-} addCommitsLength :: Int -> Commits s -> Int addCommitsLength !acc Initial = acc addCommitsLength !acc (Immutable _ _ x cs) = addCommitsLength (acc + I# x) cs addCommitsLength !acc (Mutable _ x cs) = addCommitsLength (acc + I# x) cs --- | Cons the chunks from a list of @Commits@ onto an initial --- @Chunks@ list (this argument is often @ChunksNil@). This reverses --- the order of the chunks, which is desirable since builders assemble --- @Commits@ with the chunks backwards. This performs an in-place shrink --- and freezes any mutable byte arrays it encounters. Consequently, --- these must not be reused. +{- | Cons the chunks from a list of @Commits@ onto an initial +@Chunks@ list (this argument is often @ChunksNil@). This reverses +the order of the chunks, which is desirable since builders assemble +@Commits@ with the chunks backwards. This performs an in-place shrink +and freezes any mutable byte arrays it encounters. Consequently, +these must not be reused. +-} reverseCommitsOntoChunks :: Chunks -> Commits s -> ST s Chunks reverseCommitsOntoChunks !xs Initial = pure xs reverseCommitsOntoChunks !xs (Immutable arr off len cs) = @@ -159,17 +176,18 @@ reverseCommitsOntoChunks !xs (Mutable buf len cs) = case len of arr <- PM.unsafeFreezeByteArray (MutableByteArray buf) reverseCommitsOntoChunks (ChunksCons (Bytes arr 0 (I# len)) xs) cs --- | Variant of 'reverseCommitsOntoChunks' that does not reverse --- the order of the commits. Since commits are built backwards by --- consing, this means that the chunks appended to the front will --- be backwards. Within each chunk, however, the bytes will be in --- the correct order. --- --- Unlike 'reverseCommitsOntoChunks', this function is not tail --- recursive. +{- | Variant of 'reverseCommitsOntoChunks' that does not reverse +the order of the commits. Since commits are built backwards by +consing, this means that the chunks appended to the front will +be backwards. Within each chunk, however, the bytes will be in +the correct order. + +Unlike 'reverseCommitsOntoChunks', this function is not tail +recursive. +-} commitsOntoChunks :: Chunks -> Commits s -> ST s Chunks commitsOntoChunks !xs0 cs0 = go cs0 - where + where go Initial = pure xs0 go (Immutable arr off len cs) = do xs <- go cs @@ -183,53 +201,65 @@ commitsOntoChunks !xs0 cs0 = go cs0 xs <- go cs pure $! ChunksCons (Bytes arr 0 (I# len)) xs --- | Copy the contents of the chunks into a mutable array, reversing --- the order of the chunks. --- Precondition: The destination must have enough space to house the --- contents. This is not checked. +{- | Copy the contents of the chunks into a mutable array, reversing +the order of the chunks. +Precondition: The destination must have enough space to house the +contents. This is not checked. +-} copyReverseCommits :: - MutableByteArray s -- ^ Destination - -> Int -- ^ Destination range successor - -> Commits s -- ^ Source - -> ST s Int -{-# inline copyReverseCommits #-} -copyReverseCommits (MutableByteArray dst) (I# off) cs = ST - (\s0 -> case copyReverseCommits# dst off cs s0 of - (# s1, nextOff #) -> (# s1, I# nextOff #) - ) + -- | Destination + MutableByteArray s -> + -- | Destination range successor + Int -> + -- | Source + Commits s -> + ST s Int +{-# INLINE copyReverseCommits #-} +copyReverseCommits (MutableByteArray dst) (I# off) cs = + ST + ( \s0 -> case copyReverseCommits# dst off cs s0 of + (# s1, nextOff #) -> (# s1, I# nextOff #) + ) copyReverseCommits# :: - MutableByteArray# s - -> Int# - -> Commits s - -> State# s - -> (# State# s, Int# #) + MutableByteArray# s -> + Int# -> + Commits s -> + State# s -> + (# State# s, Int# #) copyReverseCommits# _ off Initial s0 = (# s0, off #) copyReverseCommits# marr prevOff (Mutable arr sz cs) s0 = - let !off = prevOff -# sz in - case Op.copyMutableByteArray# arr 0# marr off sz s0 of - s1 -> copyReverseCommits# marr off cs s1 + let !off = prevOff -# sz + in case Op.copyMutableByteArray# arr 0# marr off sz s0 of + s1 -> copyReverseCommits# marr off cs s1 copyReverseCommits# marr prevOff (Immutable arr soff sz cs) s0 = - let !off = prevOff -# sz in - case Op.copyByteArray# arr soff marr off sz s0 of - s1 -> copyReverseCommits# marr off cs s1 + let !off = prevOff -# sz + in case Op.copyByteArray# arr soff marr off sz s0 of + s1 -> copyReverseCommits# marr off cs s1 --- | Create a builder from a cons-list of 'Char'. These --- must be UTF-8 encoded. +{- | Create a builder from a cons-list of 'Char'. These +must be UTF-8 encoded. +-} stringUtf8 :: String -> Builder -{-# inline stringUtf8 #-} +{-# INLINE stringUtf8 #-} stringUtf8 cs = Builder (goString cs) --- | Create a builder from a @NUL@-terminated 'CString'. This ignores any --- textual encoding, copying bytes until @NUL@ is reached. +{- | Create a builder from a @NUL@-terminated 'CString'. This ignores any +textual encoding, copying bytes until @NUL@ is reached. +-} cstring :: CString -> Builder -{-# inline cstring #-} +{-# INLINE cstring #-} cstring (Ptr cs) = Builder (goCString cs) -goString :: String - -> MutableByteArray# s -> Int# -> Int# -> Commits s - -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) -{-# noinline goString #-} +goString :: + String -> + MutableByteArray# s -> + Int# -> + Int# -> + Commits s -> + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) +{-# NOINLINE goString #-} goString [] buf0 off0 len0 cs0 s0 = (# s0, buf0, off0, len0, cs0 #) goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of 1# -> case unST (UnsafeBounded.pasteST (Bounded.char c) (MutableByteArray buf0) (I# off0)) s0 of @@ -245,39 +275,53 @@ goString (c : cs) buf0 off0 len0 cs0 s0 = case len0 ># 3# of -- used Modified UTF-8. {-# RULES "Builder stringUtf8/cstring" forall s a b c d e. - goString (unpackCString# s) a b c d e = goCString s a b c d e + goString (unpackCString# s) a b c d e = + goCString s a b c d e "Builder stringUtf8/cstring-utf8" forall s a b c d e. - goString (unpackCStringUtf8# s) a b c d e = goCString s a b c d e -#-} - -goCString :: Addr# -> MutableByteArray# s -> Int# -> Int# -> Commits s - -> State# s -> (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) + goString (unpackCStringUtf8# s) a b c d e = + goCString s a b c d e + #-} + +goCString :: + Addr# -> + MutableByteArray# s -> + Int# -> + Int# -> + Commits s -> + State# s -> + (# State# s, MutableByteArray# s, Int#, Int#, Commits s #) goCString addr buf0 off0 len0 cs0 s0 = case C.word8ToWord# (Exts.indexWord8OffAddr# addr 0#) of 0## -> (# s0, buf0, off0, len0, cs0 #) w -> case len0 of 0# -> case Exts.newByteArray# 4080# s0 of (# s1, buf1 #) -> case Exts.writeWord8Array# buf1 0# (C.wordToWord8# w) s1 of - s2 -> goCString - (Exts.plusAddr# addr 1# ) buf1 1# (4080# -# 1# ) - (Mutable buf0 off0 cs0) - s2 + s2 -> + goCString + (Exts.plusAddr# addr 1#) + buf1 + 1# + (4080# -# 1#) + (Mutable buf0 off0 cs0) + s2 _ -> case Exts.writeWord8Array# buf0 off0 (C.wordToWord8# w) s0 of - s1 -> goCString (Exts.plusAddr# addr 1# ) buf0 (off0 +# 1# ) (len0 -# 1# ) cs0 s1 + s1 -> goCString (Exts.plusAddr# addr 1#) buf0 (off0 +# 1#) (len0 -# 1#) cs0 s1 fromEffect :: - Int -- ^ Maximum number of bytes the paste function needs - -> (forall s. MutableByteArray s -> Int -> ST s Int) - -- ^ Paste function. Takes a byte array and an offset and returns - -- the new offset and having pasted into the buffer. - -> Builder -{-# inline fromEffect #-} + -- | Maximum number of bytes the paste function needs + Int -> + -- | Paste function. Takes a byte array and an offset and returns + -- the new offset and having pasted into the buffer. + (forall s. MutableByteArray s -> Int -> ST s Int) -> + Builder +{-# INLINE fromEffect #-} fromEffect (I# req) f = Builder $ \buf0 off0 len0 cs0 s0 -> let !(# s1, buf1, off1, len1, cs1 #) = case len0 >=# req of 1# -> (# s0, buf0, off0, len0, cs0 #) - _ -> let !(I# lenX) = max 4080 (I# req) in - case Exts.newByteArray# lenX s0 of - (# sX, bufX #) -> - (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) + _ -> + let !(I# lenX) = max 4080 (I# req) + in case Exts.newByteArray# lenX s0 of + (# sX, bufX #) -> + (# sX, bufX, 0#, lenX, Mutable buf0 off0 cs0 #) in case unST (f (MutableByteArray buf1) (I# off1)) s1 of (# s2, I# off2 #) -> (# s2, buf1, off2, len1 -# (off2 -# off1), cs1 #) @@ -288,24 +332,26 @@ shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s () shrinkMutableByteArray (MutableByteArray arr) (I# sz) = primitive_ (Exts.shrinkMutableByteArray# arr sz) --- | Variant of commitDistance where you get to supply a --- head of the commit list that has not yet been committed. +{- | Variant of commitDistance where you get to supply a +head of the commit list that has not yet been committed. +-} commitDistance1 :: - MutableByteArray# s -- target - -> Int# -- offset into target - -> MutableByteArray# s -- head of array - -> Int# -- offset into head of array - -> Commits s - -> Int# + MutableByteArray# s -> -- target + Int# -> -- offset into target + MutableByteArray# s -> -- head of array + Int# -> -- offset into head of array + Commits s -> + Int# commitDistance1 target offTarget buf0 offBuf cs = case Exts.sameMutableByteArray# target buf0 of 1# -> offBuf -# offTarget _ -> commitDistance target offBuf cs -# offTarget --- | Compute the number of bytes between the last byte and the offset --- specified in a chunk. Precondition: the chunk must exist in the --- list of committed chunks. This relies on mutable byte arrays having --- identity (e.g. it uses @sameMutableByteArray#@). +{- | Compute the number of bytes between the last byte and the offset +specified in a chunk. Precondition: the chunk must exist in the +list of committed chunks. This relies on mutable byte arrays having +identity (e.g. it uses @sameMutableByteArray#@). +-} commitDistance :: MutableByteArray# s -> Int# -> Commits s -> Int# commitDistance !_ !_ Initial = errorWithoutStackTrace "chunkDistance: chunk not found" commitDistance target !n (Immutable _ _ len cs) = @@ -315,48 +361,59 @@ commitDistance target !n (Mutable buf len cs) = 1# -> n +# len _ -> commitDistance target (n +# len) cs --- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes. --- This escapes all characters with code points below @0x20@. --- --- * Precondition: The slice of the byte argument is UTF-8 encoded text. --- * Precondition: There is enough space in the buffer for the result --- to be written to. A simple way to ensure enough space is to allocate --- @6N + 2@ bytes, where N is the length of the argument. However, the --- caller may use clever heuristics to find a lower upper bound. --- * Result: The next offset in the destination buffer +{- | Encode (UTF-8 encoded) text as a JSON string, wrapping it in double quotes. +This escapes all characters with code points below @0x20@. + +* Precondition: The slice of the byte argument is UTF-8 encoded text. +* Precondition: There is enough space in the buffer for the result + to be written to. A simple way to ensure enough space is to allocate + @6N + 2@ bytes, where N is the length of the argument. However, the + caller may use clever heuristics to find a lower upper bound. +* Result: The next offset in the destination buffer +-} pasteUtf8TextJson# :: - ByteArray# -- ^ source - -> Int# -- ^ source offset - -> Int# -- ^ source length - -> MutableByteArray# s -- ^ destination buffer - -> Int# -- ^ offset into destination buffer - -> State# s -- ^ state token - -> (# State# s, Int# #) -- returns next destination offset -{-# noinline pasteUtf8TextJson# #-} + -- | source + ByteArray# -> + -- | source offset + Int# -> + -- | source length + Int# -> + -- | destination buffer + MutableByteArray# s -> + -- | offset into destination buffer + Int# -> + -- | state token + State# s -> + (# State# s, Int# #) -- returns next destination offset +{-# NOINLINE pasteUtf8TextJson# #-} pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# = let ST f = do let dst = MutableByteArray dst# let doff0 = I# doff0# PM.writeByteArray dst doff0 (c2w '"') - let go !soff !slen !doff = if slen > 0 - then case indexChar8Array (ByteArray src#) soff of - '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2) - '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2) - c -> if c >= '\x20' - then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) - else case c of - '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2) - '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2) - '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2) - '\b' -> write2 dst doff '\\' 'b' *> go (soff + 1) (slen - 1) (doff + 2) - '\f' -> write2 dst doff '\\' 'f' *> go (soff + 1) (slen - 1) (doff + 2) - _ -> do - write2 dst doff '\\' 'u' - doff' <- UnsafeBounded.pasteST - (Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) - dst (doff + 2) - go (soff + 1) (slen - 1) doff' - else pure doff + let go !soff !slen !doff = + if slen > 0 + then case indexChar8Array (ByteArray src#) soff of + '\\' -> write2 dst doff '\\' '\\' *> go (soff + 1) (slen - 1) (doff + 2) + '\"' -> write2 dst doff '\\' '\"' *> go (soff + 1) (slen - 1) (doff + 2) + c -> + if c >= '\x20' + then PM.writeByteArray dst doff (c2w c) *> go (soff + 1) (slen - 1) (doff + 1) + else case c of + '\n' -> write2 dst doff '\\' 'n' *> go (soff + 1) (slen - 1) (doff + 2) + '\r' -> write2 dst doff '\\' 'r' *> go (soff + 1) (slen - 1) (doff + 2) + '\t' -> write2 dst doff '\\' 't' *> go (soff + 1) (slen - 1) (doff + 2) + '\b' -> write2 dst doff '\\' 'b' *> go (soff + 1) (slen - 1) (doff + 2) + '\f' -> write2 dst doff '\\' 'f' *> go (soff + 1) (slen - 1) (doff + 2) + _ -> do + write2 dst doff '\\' 'u' + doff' <- + UnsafeBounded.pasteST + (Bounded.word16PaddedUpperHex (fromIntegral (c2w c))) + dst + (doff + 2) + go (soff + 1) (slen - 1) doff' + else pure doff doffRes <- go (I# soff0#) (I# slen0#) (doff0 + 1) PM.writeByteArray dst doffRes (c2w '"') pure (doffRes + 1) @@ -364,7 +421,7 @@ pasteUtf8TextJson# src# soff0# slen0# dst# doff0# s0# = in (# s1, dstFinal #) c2w :: Char -> Word8 -{-# inline c2w #-} +{-# INLINE c2w #-} c2w = fromIntegral . ord -- Internal. Write two characters in the ASCII plane to a byte array. @@ -374,5 +431,5 @@ write2 marr ix a b = do PM.writeByteArray marr (ix + 1) (c2w b) indexChar8Array :: ByteArray -> Int -> Char -{-# inline indexChar8Array #-} +{-# INLINE indexChar8Array #-} indexChar8Array (ByteArray b) (I# i) = C# (Exts.indexCharArray# b i) diff --git a/test/Main.hs b/test/Main.hs index 1b206c7..e5d3be2 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,53 +1,50 @@ -{-# language BangPatterns #-} -{-# language NumericUnderscores #-} -{-# language OverloadedStrings #-} -{-# language QuasiQuotes #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} - +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {-# OPTIONS_GHC -fno-warn-orphans #-} import Prelude hiding (replicate) -import Control.Applicative (liftA2) import Control.Monad.ST (runST) import Data.Bytes.Builder import Data.Bytes.Builder.Template (bldr) -import Data.Bytes.Types (MutableBytes(MutableBytes)) -import Data.Char (ord,chr) -import Data.IORef (IORef,newIORef,readIORef,writeIORef) +import Data.Bytes.Types (MutableBytes (MutableBytes)) +import Data.Char (chr, ord) +import Data.IORef (IORef, newIORef, readIORef, writeIORef) import Data.Maybe (fromMaybe) -import Data.Primitive (ByteArray) -import Data.Primitive (PrimArray) +import Data.Primitive (ByteArray, PrimArray) import Data.Text.Short (ShortText) -import Data.WideWord (Word128(Word128),Word256(Word256)) +import Data.WideWord (Word128 (Word128), Word256 (Word256)) import Data.Word import Numeric.Natural (Natural) -import Test.QuickCheck ((===),Arbitrary) +import Test.QuickCheck (Arbitrary, (===)) import Test.QuickCheck.Instances.Natural () -import Test.Tasty (defaultMain,testGroup,TestTree) +import Test.Tasty (TestTree, defaultMain, testGroup) import Test.Tasty.HUnit ((@=?)) import Text.Printf (printf) import qualified Arithmetic.Nat as Nat import qualified Data.Bits as Bits +import qualified Data.ByteString as ByteString +import qualified Data.ByteString.Builder as BB +import qualified Data.ByteString.Lazy.Char8 as LB import qualified Data.Bytes as Bytes import qualified Data.Bytes.Builder as Builder import qualified Data.Bytes.Builder.Bounded as Bounded import qualified Data.Bytes.Chunks as Chunks import qualified Data.Bytes.Text.Ascii as Ascii import qualified Data.Bytes.Text.Latin1 as Latin1 -import qualified Data.ByteString as ByteString -import qualified Data.ByteString.Builder as BB -import qualified Data.ByteString.Lazy.Char8 as LB import qualified Data.List as L import qualified Data.Primitive as PM import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified GHC.Exts as Exts -import qualified Prelude import qualified Test.Tasty.HUnit as THU import qualified Test.Tasty.QuickCheck as TQC +import qualified Prelude import qualified HexWord64 import qualified Word16Tree @@ -56,299 +53,309 @@ main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "Tests" - [ testGroup "live" - [ TQC.testProperty "word64Dec" $ \w -> - runConcat 1 (word64Dec w) === pack (show w) - , TQC.testProperty "word64Dec-x3" $ \x y z -> - runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z) - === - pack (show x ++ show y ++ show z) - , TQC.testProperty "int64Dec-x3" $ \x y z -> - runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z) - === - pack (show x ++ show y ++ show z) - , TQC.testProperty "word64BE-x3" $ \x y z -> - runConcat 1 (word64BE x <> word64BE y <> word64BE z) - === - pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) - , TQC.testProperty "word256PaddedLowerHex" $ \w -> - Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w) - === - pack (showWord256PaddedLowerHex w) - , TQC.testProperty "word128PaddedUpperHex" $ \w -> - Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w) - === - pack (showWord128PaddedUpperHex w) - , TQC.testProperty "word64PaddedUpperHex" $ \w -> - runConcat 1 (word64PaddedUpperHex w) - === - pack (showWord64PaddedUpperHex w) - , TQC.testProperty "word16PaddedLowerHex" $ \w -> - runConcat 1 (word16PaddedLowerHex w) - === - pack (showWord16PaddedLowerHex w) - , TQC.testProperty "wordPaddedDec2" $ TQC.forAll (TQC.choose (0,99)) $ \w -> - Bounded.run Nat.two (Bounded.wordPaddedDec2 w) - === - pack (zeroPadL 2 (show w)) - , TQC.testProperty "wordPaddedDec4" $ TQC.forAll (TQC.choose (0,9999)) $ \w -> - Bounded.run Nat.constant (Bounded.wordPaddedDec4 w) - === - pack (zeroPadL 4 (show w)) - , TQC.testProperty "wordPaddedDec9" $ TQC.forAll (TQC.choose (0,999999999)) $ \w -> - Bounded.run Nat.constant (Bounded.wordPaddedDec9 w) - === - pack (zeroPadL 9 (show w)) - , TQC.testProperty "word8Dec" $ \w -> - runConcat 1 (word8Dec w) - === - pack (show w) - , TQC.testProperty "consLength32BE" $ \w -> - runConcat 1 (consLength32BE (word8Dec w)) - === - pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w) - , TQC.testProperty "consLength64BE-uni" $ \w -> - pack - ( '\x00' : '\x00' : '\x00' : '\x00' - : '\x00' : '\x00' : '\x00' : chr (L.length (show w)) - : show w - ) - === - runConcat 1 (consLength64BE (word16Dec w)) - , TQC.testProperty "consLength64BE-multi" $ \w -> - pack - ( '\x00' : '\x00' : '\x00' : '\x00' - : '\x00' : '\x00' : '\x00' : chr (1 + L.length (show w)) - : '\x42' : show w - ) - === - runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w)) - , THU.testCase "stringUtf8" $ - packUtf8 "¿Cómo estás? I am doing well." @=? - runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.") - , THU.testCase "doubleDec-A" $ - pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0) - , THU.testCase "doubleDec-B" $ - pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5) - , THU.testCase "doubleDec-C" $ - pack ("1e+15") @=? runConcat 1 (doubleDec 1e15) - , THU.testCase "doubleDec-D" $ - pack ("-42") @=? runConcat 1 (doubleDec (-42)) - , THU.testCase "doubleDec-E" $ - AsciiByteArray (pack ("-8.8888888888889e+14")) @=? AsciiByteArray (runConcat 1 (doubleDec (-888888888888888.8888888))) - , THU.testCase "doubleDec-F" $ - pack ("42") @=? runConcat 1 (doubleDec 42) - , THU.testCase "doubleDec-G" $ - pack ("0") @=? runConcat 1 (doubleDec 0) - , THU.testCase "doubleDec-H" $ - pack ("0.5") @=? runConcat 1 (doubleDec 0.5) - , THU.testCase "doubleDec-I" $ - pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5)) - , THU.testCase "doubleDec-J" $ - pack ("999999999") @=? runConcat 1 (doubleDec 999999999) - , THU.testCase "doubleDec-K" $ - pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999)) - , THU.testCase "doubleDec-L" $ - AsciiByteArray (pack ("6.6666666666667e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000))) - , THU.testCase "doubleDec-M" $ - AsciiByteArray (pack ("6.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 6.666666666666667e-10)) - , THU.testCase "doubleDec-N" $ - AsciiByteArray (pack ("5e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 5.0e-10)) - , THU.testCase "doubleDec-O" $ - AsciiByteArray (pack ("1.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.6666666666666669e-10)) - , THU.testCase "doubleDec-P" $ - AsciiByteArray (pack ("1e-09")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-9)) - , THU.testCase "doubleDec-Q" $ - AsciiByteArray (pack ("1e-08")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-8)) - , THU.testCase "shortTextJsonString-A" $ - pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello") - , THU.testCase "shortTextJsonString-B" $ - pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/") - , THU.testCase "shortTextJsonString-C" $ - pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo") - , THU.testCase "shortTextJsonString-D" $ - pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo") - , THU.testCase "word-16-tree" $ - Word16Tree.expectedSmall @=? runConcat 1 - (Word16Tree.encode Word16Tree.exampleSmall) - , THU.testCase "byteArray-small" $ - let a = replicateByte 3 0x50 - b = replicateByte 5 0x51 - in mconcat [a,b] @=? runConcat 1 - ( byteArray a <> byteArray b ) - , THU.testCase "byteArray-big" $ - let a = replicateByte 2105 0x50 - b = replicateByte 725 0x51 - c = replicateByte 900 0x52 - d = replicateByte 800 0x53 - e = replicateByte 700 0x54 - f = replicateByte 950 0x55 - g = replicateByte 975 0x56 - h = replicateByte 3000 0x57 - i = replicateByte 125 0x58 - in mconcat [a,b,c,d,e,f,g,h,i] @=? runConcat 1 - ( byteArray a <> byteArray b <> byteArray c <> - byteArray d <> byteArray e <> byteArray f <> - byteArray g <> byteArray h <> byteArray i +tests = + testGroup + "Tests" + [ testGroup + "live" + [ TQC.testProperty "word64Dec" $ \w -> + runConcat 1 (word64Dec w) === pack (show w) + , TQC.testProperty "word64Dec-x3" $ \x y z -> + runConcat 1 (word64Dec x <> word64Dec y <> word64Dec z) + === pack (show x ++ show y ++ show z) + , TQC.testProperty "int64Dec-x3" $ \x y z -> + runConcat 1 (int64Dec x <> int64Dec y <> int64Dec z) + === pack (show x ++ show y ++ show z) + , TQC.testProperty "word64BE-x3" $ \x y z -> + runConcat 1 (word64BE x <> word64BE y <> word64BE z) + === pack (LB.unpack (BB.toLazyByteString (BB.word64BE x <> BB.word64BE y <> BB.word64BE z))) + , TQC.testProperty "word256PaddedLowerHex" $ \w -> + Bounded.run Nat.constant (Bounded.word256PaddedLowerHex w) + === pack (showWord256PaddedLowerHex w) + , TQC.testProperty "word128PaddedUpperHex" $ \w -> + Bounded.run Nat.constant (Bounded.word128PaddedUpperHex w) + === pack (showWord128PaddedUpperHex w) + , TQC.testProperty "word64PaddedUpperHex" $ \w -> + runConcat 1 (word64PaddedUpperHex w) + === pack (showWord64PaddedUpperHex w) + , TQC.testProperty "word16PaddedLowerHex" $ \w -> + runConcat 1 (word16PaddedLowerHex w) + === pack (showWord16PaddedLowerHex w) + , TQC.testProperty "wordPaddedDec2" $ TQC.forAll (TQC.choose (0, 99)) $ \w -> + Bounded.run Nat.two (Bounded.wordPaddedDec2 w) + === pack (zeroPadL 2 (show w)) + , TQC.testProperty "wordPaddedDec4" $ TQC.forAll (TQC.choose (0, 9999)) $ \w -> + Bounded.run Nat.constant (Bounded.wordPaddedDec4 w) + === pack (zeroPadL 4 (show w)) + , TQC.testProperty "wordPaddedDec9" $ TQC.forAll (TQC.choose (0, 999999999)) $ \w -> + Bounded.run Nat.constant (Bounded.wordPaddedDec9 w) + === pack (zeroPadL 9 (show w)) + , TQC.testProperty "word8Dec" $ \w -> + runConcat 1 (word8Dec w) + === pack (show w) + , TQC.testProperty "consLength32BE" $ \w -> + runConcat 1 (consLength32BE (word8Dec w)) + === pack ('\x00' : '\x00' : '\x00' : chr (L.length (show w)) : show w) + , TQC.testProperty "consLength64BE-uni" $ \w -> + pack + ( '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : chr (L.length (show w)) + : show w ) - , TQC.testProperty "word16ArrayLE" $ \(xs :: [Word16]) -> - let ys = Exts.fromList xs :: PrimArray Word16 - in runConcat 1 (foldMap word16LE xs) - === - runConcat 1 (word16ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word16ArrayBE" $ \(xs :: [Word16]) -> - let ys = Exts.fromList xs :: PrimArray Word16 - in runConcat 1 (foldMap word16BE xs) - === - runConcat 1 (word16ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word32ArrayLE" $ \(xs :: [Word32]) -> - let ys = Exts.fromList xs :: PrimArray Word32 - in runConcat 1 (foldMap word32LE xs) - === - runConcat 1 (word32ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word32ArrayBE" $ \(xs :: [Word32]) -> - let ys = Exts.fromList xs :: PrimArray Word32 - in runConcat 1 (foldMap word32BE xs) - === - runConcat 1 (word32ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64ArrayLE" $ \(xs :: [Word64]) -> - let ys = Exts.fromList xs :: PrimArray Word64 - in runConcat 1 (foldMap word64LE xs) - === - runConcat 1 (word64ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64ArrayBE" $ \(xs :: [Word64]) -> - let ys = Exts.fromList xs :: PrimArray Word64 - in runConcat 1 (foldMap word64BE xs) - === - runConcat 1 (word64ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word128ArrayLE" $ \(xs :: [Word128]) -> - let ys = Exts.fromList xs :: PrimArray Word128 - in runConcat 1 (foldMap word128LE xs) - === - runConcat 1 (word128ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word128ArrayBE" $ \(xs :: [Word128]) -> - let ys = Exts.fromList xs :: PrimArray Word128 - in runConcat 1 (foldMap word128BE xs) - === - runConcat 1 (word128ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word256ArrayLE" $ \(xs :: [Word256]) -> - let ys = Exts.fromList xs :: PrimArray Word256 - in runConcat 1 (foldMap word256LE xs) - === - runConcat 1 (word256ArrayLE ys 0 (Prelude.length xs)) - , TQC.testProperty "word256ArrayBE" $ \(xs :: [Word256]) -> - let ys = Exts.fromList xs :: PrimArray Word256 - in runConcat 1 (foldMap word256BE xs) - === - runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) - , TQC.testProperty "word64Vlq" $ \(x :: Word64) -> - runConcat 1 (word64Vlq x) - === - naiveVlq (fromIntegral x) - , TQC.testProperty "word64LEB128" $ \(x :: Word64) -> - runConcat 1 (word64LEB128 x) - === - naiveLeb128 (fromIntegral x) - , TQC.testProperty "naturalDec-A" $ \(x :: Natural) -> - runConcat 1 (naturalDec x) - === - pack (show x) - , TQC.testProperty "naturalDec-B" $ \(x :: Natural) -> - let y = 1234567892345678934678987654321 * x in - runConcat 1 (naturalDec y) - === - pack (show y) - , testGroup "leb128-encoding" - [ THU.testCase "16" $ - Chunks.concat (run 16 (word64LEB128 16)) - @=? - Latin1.fromString "\x10" - , THU.testCase "1000000" $ - Chunks.concat (run 16 (word64LEB128 1000000)) - @=? - Exts.fromList [0xc0,0x84,0x3d] - , THU.testCase "deadbeef-smile" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightSmile inp) - @=?Latin1.fromString "\x6F\x2B\x37\x6E\x0F" - ] - , testGroup "seven/eight encoding" - [ THU.testCase "deadbeef" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightRight inp) - @=? Latin1.fromString "\x6F\x2B\x37\x6E\x78" - , THU.testCase "deadbeef-smile" $ do - let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" - (Chunks.concat . run 16) (sevenEightSmile inp) - @=?Latin1.fromString "\x6F\x2B\x37\x6E\x0F" - ] - ] - , testGroup "alternate" - [ TQC.testProperty "HexWord64" $ \x y -> - runConcat 1 - ( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x) - <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y) - ) - === - pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) - ] - , testGroup "putMany" - [ THU.testCase "A" $ do - ref <- newIORef [] - let txt = "hello_world_are_you_listening" :: [Char] - putMany 7 ascii txt (bytesOntoRef ref) - res <- readIORef ref - id $ - [ map c2w "hello_" - , map c2w "world_" - , map c2w "are_yo" - , map c2w "u_list" - , map c2w "ening" - ] @=? map Exts.toList (Exts.toList res) - ] - , testGroup "putManyConsLength" - [ THU.testCase "A" $ do - ref <- newIORef [] - let txt = "hello_world_are_you_listening" :: [Char] - putManyConsLength Nat.constant - (\n -> Bounded.word16BE (fromIntegral n)) - 16 ascii txt (bytesOntoRef ref) - res <- readIORef ref - id $ - [ 0x00 : 0x0A : map c2w "hello_worl" - , 0x00 : 0x0A : map c2w "d_are_you_" - , 0x00 : 0x09 : map c2w "listening" - ] @=? map Exts.toList (Exts.toList res) - ] - , testGroup "bytes templates" - [ THU.testCase "A" $ do - let name = Just ("foo" :: ShortText) - msgBuilder = [bldr|Hello `fromMaybe "World" name`!\n|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "Hello foo!\n" @=? msg - , THU.testCase "B" $ do - let one = "foo" :: ShortText - two = "bar" :: String - msgBuilder = [bldr|`one``two`|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "foobar" @=? msg - , THU.testCase "C" $ do - let msgBuilder = [bldr|a backtick for you: \`|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "a backtick for you: `" @=? msg - , THU.testCase "D" $ do - let i = 137 :: Int - msgBuilder = [bldr|there are `i` lights!|] - msg = Chunks.concat . Builder.run 200 $ msgBuilder - in Ascii.fromString "there are 137 lights!" @=? msg + === runConcat 1 (consLength64BE (word16Dec w)) + , TQC.testProperty "consLength64BE-multi" $ \w -> + pack + ( '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : '\x00' + : chr (1 + L.length (show w)) + : '\x42' + : show w + ) + === runConcat 1 (consLength64BE (word8 0x42 <> flush 2 <> word16Dec w)) + , THU.testCase "stringUtf8" $ + packUtf8 "¿Cómo estás? I am doing well." + @=? runConcat 1 (stringUtf8 "¿Cómo estás? I am doing well.") + , THU.testCase "doubleDec-A" $ + pack (show (2 :: Int)) @=? runConcat 1 (doubleDec 2.0) + , THU.testCase "doubleDec-B" $ + pack (show (2.5 :: Double)) @=? runConcat 1 (doubleDec 2.5) + , THU.testCase "doubleDec-C" $ + pack ("1e+15") @=? runConcat 1 (doubleDec 1e15) + , THU.testCase "doubleDec-D" $ + pack ("-42") @=? runConcat 1 (doubleDec (-42)) + , THU.testCase "doubleDec-E" $ + AsciiByteArray (pack ("-8.8888888888889e+14")) @=? AsciiByteArray (runConcat 1 (doubleDec (-888888888888888.8888888))) + , THU.testCase "doubleDec-F" $ + pack ("42") @=? runConcat 1 (doubleDec 42) + , THU.testCase "doubleDec-G" $ + pack ("0") @=? runConcat 1 (doubleDec 0) + , THU.testCase "doubleDec-H" $ + pack ("0.5") @=? runConcat 1 (doubleDec 0.5) + , THU.testCase "doubleDec-I" $ + pack ("-0.5") @=? runConcat 1 (doubleDec (-0.5)) + , THU.testCase "doubleDec-J" $ + pack ("999999999") @=? runConcat 1 (doubleDec 999999999) + , THU.testCase "doubleDec-K" $ + pack ("-99999999") @=? runConcat 1 (doubleDec (-99999999)) + , THU.testCase "doubleDec-L" $ + AsciiByteArray (pack ("6.6666666666667e-12")) @=? AsciiByteArray (runConcat 1 (doubleDec (2 / 300_000_000_000))) + , THU.testCase "doubleDec-M" $ + AsciiByteArray (pack ("6.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 6.666666666666667e-10)) + , THU.testCase "doubleDec-N" $ + AsciiByteArray (pack ("5e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 5.0e-10)) + , THU.testCase "doubleDec-O" $ + AsciiByteArray (pack ("1.6666666666667e-10")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.6666666666666669e-10)) + , THU.testCase "doubleDec-P" $ + AsciiByteArray (pack ("1e-09")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-9)) + , THU.testCase "doubleDec-Q" $ + AsciiByteArray (pack ("1e-08")) @=? AsciiByteArray (runConcat 1 (doubleDec 1.0e-8)) + , THU.testCase "shortTextJsonString-A" $ + pack ("\"hello\"") @=? runConcat 1 (shortTextJsonString "hello") + , THU.testCase "shortTextJsonString-B" $ + pack ("\"\\\\_\\\"_/\"") @=? runConcat 1 (shortTextJsonString "\\_\"_/") + , THU.testCase "shortTextJsonString-C" $ + pack ("\"Hi\\r\\nLo\"") @=? runConcat 1 (shortTextJsonString "Hi\r\nLo") + , THU.testCase "shortTextJsonString-D" $ + pack ("\"Hi\\u001BLo\"") @=? runConcat 1 (shortTextJsonString "Hi\ESCLo") + , THU.testCase "word-16-tree" $ + Word16Tree.expectedSmall + @=? runConcat + 1 + (Word16Tree.encode Word16Tree.exampleSmall) + , THU.testCase "byteArray-small" $ + let a = replicateByte 3 0x50 + b = replicateByte 5 0x51 + in mconcat [a, b] + @=? runConcat + 1 + (byteArray a <> byteArray b) + , THU.testCase "byteArray-big" $ + let a = replicateByte 2105 0x50 + b = replicateByte 725 0x51 + c = replicateByte 900 0x52 + d = replicateByte 800 0x53 + e = replicateByte 700 0x54 + f = replicateByte 950 0x55 + g = replicateByte 975 0x56 + h = replicateByte 3000 0x57 + i = replicateByte 125 0x58 + in mconcat [a, b, c, d, e, f, g, h, i] + @=? runConcat + 1 + ( byteArray a + <> byteArray b + <> byteArray c + <> byteArray d + <> byteArray e + <> byteArray f + <> byteArray g + <> byteArray h + <> byteArray i + ) + , TQC.testProperty "word16ArrayLE" $ \(xs :: [Word16]) -> + let ys = Exts.fromList xs :: PrimArray Word16 + in runConcat 1 (foldMap word16LE xs) + === runConcat 1 (word16ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word16ArrayBE" $ \(xs :: [Word16]) -> + let ys = Exts.fromList xs :: PrimArray Word16 + in runConcat 1 (foldMap word16BE xs) + === runConcat 1 (word16ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word32ArrayLE" $ \(xs :: [Word32]) -> + let ys = Exts.fromList xs :: PrimArray Word32 + in runConcat 1 (foldMap word32LE xs) + === runConcat 1 (word32ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word32ArrayBE" $ \(xs :: [Word32]) -> + let ys = Exts.fromList xs :: PrimArray Word32 + in runConcat 1 (foldMap word32BE xs) + === runConcat 1 (word32ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64ArrayLE" $ \(xs :: [Word64]) -> + let ys = Exts.fromList xs :: PrimArray Word64 + in runConcat 1 (foldMap word64LE xs) + === runConcat 1 (word64ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64ArrayBE" $ \(xs :: [Word64]) -> + let ys = Exts.fromList xs :: PrimArray Word64 + in runConcat 1 (foldMap word64BE xs) + === runConcat 1 (word64ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word128ArrayLE" $ \(xs :: [Word128]) -> + let ys = Exts.fromList xs :: PrimArray Word128 + in runConcat 1 (foldMap word128LE xs) + === runConcat 1 (word128ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word128ArrayBE" $ \(xs :: [Word128]) -> + let ys = Exts.fromList xs :: PrimArray Word128 + in runConcat 1 (foldMap word128BE xs) + === runConcat 1 (word128ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word256ArrayLE" $ \(xs :: [Word256]) -> + let ys = Exts.fromList xs :: PrimArray Word256 + in runConcat 1 (foldMap word256LE xs) + === runConcat 1 (word256ArrayLE ys 0 (Prelude.length xs)) + , TQC.testProperty "word256ArrayBE" $ \(xs :: [Word256]) -> + let ys = Exts.fromList xs :: PrimArray Word256 + in runConcat 1 (foldMap word256BE xs) + === runConcat 1 (word256ArrayBE ys 0 (Prelude.length xs)) + , TQC.testProperty "word64Vlq" $ \(x :: Word64) -> + runConcat 1 (word64Vlq x) + === naiveVlq (fromIntegral x) + , TQC.testProperty "word64LEB128" $ \(x :: Word64) -> + runConcat 1 (word64LEB128 x) + === naiveLeb128 (fromIntegral x) + , TQC.testProperty "naturalDec-A" $ \(x :: Natural) -> + runConcat 1 (naturalDec x) + === pack (show x) + , TQC.testProperty "naturalDec-B" $ \(x :: Natural) -> + let y = 1234567892345678934678987654321 * x + in runConcat 1 (naturalDec y) + === pack (show y) + , testGroup + "leb128-encoding" + [ THU.testCase "16" $ + Chunks.concat (run 16 (word64LEB128 16)) + @=? Latin1.fromString "\x10" + , THU.testCase "1000000" $ + Chunks.concat (run 16 (word64LEB128 1000000)) + @=? Exts.fromList [0xc0, 0x84, 0x3d] + , THU.testCase "deadbeef-smile" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightSmile inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x0F" + ] + , testGroup + "seven/eight encoding" + [ THU.testCase "deadbeef" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightRight inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x78" + , THU.testCase "deadbeef-smile" $ do + let inp = Latin1.fromString "\xDE\xAD\xBE\xEF" + (Chunks.concat . run 16) (sevenEightSmile inp) + @=? Latin1.fromString "\x6F\x2B\x37\x6E\x0F" + ] + ] + , testGroup + "alternate" + [ TQC.testProperty "HexWord64" $ \x y -> + runConcat + 1 + ( fromBounded Nat.constant (HexWord64.word64PaddedUpperHex x) + <> fromBounded Nat.constant (HexWord64.word64PaddedUpperHex y) + ) + === pack (showWord64PaddedUpperHex x <> showWord64PaddedUpperHex y) + ] + , testGroup + "putMany" + [ THU.testCase "A" $ do + ref <- newIORef [] + let txt = "hello_world_are_you_listening" :: [Char] + putMany 7 ascii txt (bytesOntoRef ref) + res <- readIORef ref + id $ + [ map c2w "hello_" + , map c2w "world_" + , map c2w "are_yo" + , map c2w "u_list" + , map c2w "ening" + ] + @=? map Exts.toList (Exts.toList res) + ] + , testGroup + "putManyConsLength" + [ THU.testCase "A" $ do + ref <- newIORef [] + let txt = "hello_world_are_you_listening" :: [Char] + putManyConsLength + Nat.constant + (\n -> Bounded.word16BE (fromIntegral n)) + 16 + ascii + txt + (bytesOntoRef ref) + res <- readIORef ref + id $ + [ 0x00 : 0x0A : map c2w "hello_worl" + , 0x00 : 0x0A : map c2w "d_are_you_" + , 0x00 : 0x09 : map c2w "listening" + ] + @=? map Exts.toList (Exts.toList res) + ] + , testGroup + "bytes templates" + [ THU.testCase "A" $ do + let name = Just ("foo" :: ShortText) + msgBuilder = [bldr|Hello `fromMaybe "World" name`!\n|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "Hello foo!\n" @=? msg + , THU.testCase "B" $ do + let one = "foo" :: ShortText + two = "bar" :: String + msgBuilder = [bldr|`one``two`|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "foobar" @=? msg + , THU.testCase "C" $ do + let msgBuilder = [bldr|a backtick for you: \`|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "a backtick for you: `" @=? msg + , THU.testCase "D" $ do + let i = 137 :: Int + msgBuilder = [bldr|there are `i` lights!|] + msg = Chunks.concat . Builder.run 200 $ msgBuilder + in Ascii.fromString "there are 137 lights!" @=? msg + ] ] - ] bytesOntoRef :: - IORef [PM.ByteArray] - -> MutableBytes Exts.RealWorld - -> IO () + IORef [PM.ByteArray] -> + MutableBytes Exts.RealWorld -> + IO () bytesOntoRef !ref (MutableBytes buf off len) = do rs <- readIORef ref dst <- PM.newByteArray len @@ -391,9 +398,10 @@ newtype AsciiByteArray = AsciiByteArray ByteArray deriving (Eq) instance Show AsciiByteArray where - show (AsciiByteArray b) = if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b) - then Latin1.toString (Bytes.fromByteArray b) - else show (show b) + show (AsciiByteArray b) = + if Bytes.all (\w -> w >= 32 && w < 127) (Bytes.fromByteArray b) + then Latin1.toString (Bytes.fromByteArray b) + else show (show b) instance Arbitrary Word128 where arbitrary = liftA2 Word128 TQC.arbitrary TQC.arbitrary @@ -409,29 +417,30 @@ zeroPadL n s naiveLeb128 :: Natural -> ByteArray naiveLeb128 x = Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))) - where + where go !xs !n = - let (q,r) = quotRem n 128 + let (q, r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r - w = if q == 0 - then r' - else Bits.setBit r' 7 + w = + if q == 0 + then r' + else Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then L.reverse xs' else go xs' q naiveVlq :: Natural -> ByteArray naiveVlq x = Bytes.toByteArray (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x))) - where + where go !xs !n = - let (q,r) = quotRem n 128 + let (q, r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r w = case xs of [] -> r' _ -> Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then xs' else go xs' q