Skip to content

Commit

Permalink
Release 0.3.16.0 with Avro module and a few minor additions
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewthad committed Jan 29, 2024
1 parent 50dbfc1 commit a5cdbf9
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 2 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ Note: Prior to version 0.3.4.0, this library was named
`small-bytearray-builder` is now just a compatibility shim
to ease the migration process.

## 0.3.16.0 -- 2024-01-29

* Add `wordPaddedDec3`.
* Add `Data.Bytes.Builder.Avro`.
* Add `word16LEB128`.
* Stop accepting versions of text lower than 2.0.

## 0.3.15.0 -- 2024-01-05

* Add `Data.Bytes.Builder.Unsafe.pasteUtf8TextJson#` for users who need
Expand Down
5 changes: 3 additions & 2 deletions bytebuild.cabal
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cabal-version: 2.2
name: bytebuild
version: 0.3.15.0
version: 0.3.16.0
synopsis: Build byte arrays
description:
This is similar to the builder facilities provided by
Expand Down Expand Up @@ -37,6 +37,7 @@ flag checked
library
exposed-modules:
Data.Bytes.Builder
Data.Bytes.Builder.Avro
Data.Bytes.Builder.Class
Data.Bytes.Builder.Template
Data.Bytes.Builder.Unsafe
Expand All @@ -58,7 +59,7 @@ library
, primitive-offset >=0.2 && <0.3
, run-st >=0.1.2 && <0.2
, template-haskell >=2.16
, text >=1.2 && <2.2
, text >=2.0 && <2.2
, text-short >=0.1.3 && <0.2
, wide-word >=0.1.0.9 && <0.2
, zigzag
Expand Down
6 changes: 6 additions & 0 deletions src/Data/Bytes/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ module Data.Bytes.Builder
, int32LEB128
, int64LEB128
, wordLEB128
, word16LEB128
, word32LEB128
, word64LEB128
-- **** VLQ
Expand Down Expand Up @@ -1219,6 +1220,11 @@ wordLEB128 :: Word -> Builder
{-# inline wordLEB128 #-}
wordLEB128 w = fromBounded Nat.constant (Bounded.wordLEB128 w)

-- | Encode a 16-bit word with LEB-128.
word16LEB128 :: Word16 -> Builder
{-# inline word16LEB128 #-}
word16LEB128 w = fromBounded Nat.constant (Bounded.word16LEB128 w)

-- | Encode a 32-bit word with LEB-128.
word32LEB128 :: Word32 -> Builder
{-# inline word32LEB128 #-}
Expand Down
77 changes: 77 additions & 0 deletions src/Data/Bytes/Builder/Avro.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{-# language BangPatterns #-}

-- | Builders for encoding data with Apache Avro. Most functions in this
-- module are just aliases for other functions. Avro uses zig-zag LEB128
-- for all integral types.
module Data.Bytes.Builder.Avro
( int
, int32
, int64
, word16
, word32
, word128
, bytes
, chunks
, text
-- * Maps
, map2
) where

import Data.Int
import Data.Word
import Data.Bytes.Builder (Builder)
import Data.Text (Text)
import Data.Bytes (Bytes)
import Data.WideWord (Word128)
import Data.Bytes.Chunks (Chunks)

import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Chunks as Chunks
import qualified Data.Bytes.Builder as B
import qualified Data.Bytes.Text.Utf8 as Utf8

int32 :: Int32 -> Builder
int32 = B.int32LEB128

int64 :: Int64 -> Builder
int64 = B.int64LEB128

int :: Int -> Builder
int = B.intLEB128

-- | Note: This results in a zigzag encoded number. Avro does not have
-- unsigned types.
word16 :: Word16 -> Builder
word16 = B.int32LEB128 . fromIntegral

-- | Note: This results in a zigzag encoded number. Avro does not have
-- unsigned types.
word32 :: Word32 -> Builder
word32 = B.int64LEB128 . fromIntegral

-- | Note: This results in a @fixed@ encoded value of length 16. In the
-- schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@.
-- A big-endian encoding is used.
word128 :: Word128 -> Builder
word128 = B.word128BE

bytes :: Bytes -> Builder
bytes !b = int (Bytes.length b) <> B.bytes b

chunks :: Chunks -> Builder
chunks !b = int (Chunks.length b) <> B.chunks b

text :: Text -> Builder
text = bytes . Utf8.fromText

-- | Encode a map with exactly two key-value pairs. The keys are text.
-- This is commonly used to encode the header in an avro file, which has
-- a map with two keys: @avro.schema@ and @avro.codec@.
map2 ::
Text -- ^ First key
-> Builder -- ^ First value (already encoded)
-> Text -- ^ Second key
-> Builder -- ^ Second value (already encoded)
-> Builder
{-# inline map2 #-}
map2 k1 v1 k2 v2 = B.word8 0x04 <> text k1 <> v1 <> text k2 <> v2 <> B.word8 0x00
15 changes: 15 additions & 0 deletions src/Data/Bytes/Builder/Bounded.hs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module Data.Bytes.Builder.Bounded
, char
-- ** Native
, wordPaddedDec2
, wordPaddedDec3
, wordPaddedDec4
, wordPaddedDec9
-- ** Machine-Readable
Expand Down Expand Up @@ -102,6 +103,7 @@ module Data.Bytes.Builder.Bounded
-- with the high bit of each output byte set to 1 in all bytes except for
-- the final byte.
, wordLEB128
, word16LEB128
, word32LEB128
, word64LEB128
-- **** VLQ
Expand Down Expand Up @@ -788,6 +790,14 @@ wordPaddedDec4 !w = Unsafe.construct $ \arr off -> do
) arr (off + 3) w
pure (off + 4)

wordPaddedDec3 :: Word -> Builder 3
wordPaddedDec3 !w = Unsafe.construct $ \arr off -> do
putRem10
(putRem10 $ putRem10
(\_ _ _ -> pure ())
) arr (off + 2) w
pure (off + 3)

-- | Encode a number less than 1e9 as a decimal number, zero-padding it to
-- nine digits. For example: 0 is encoded as @000000000@ and 5 is encoded as
-- @000000005@.
Expand Down Expand Up @@ -923,6 +933,11 @@ wordLEB128 :: Word -> Builder 10
{-# inline wordLEB128 #-}
wordLEB128 (W# w) = lebCommon (W# w)

-- | Encode a 32-bit word with LEB-128.
word16LEB128 :: Word16 -> Builder 3
{-# inline word16LEB128 #-}
word16LEB128 (W16# w) = lebCommon (W# (C.word16ToWord# w))

-- | Encode a 32-bit word with LEB-128.
word32LEB128 :: Word32 -> Builder 5
{-# inline word32LEB128 #-}
Expand Down

0 comments on commit a5cdbf9

Please sign in to comment.