From 625b0506a538061c84e5d4cdabbbac0c5c326f5a Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Sun, 15 Oct 2023 20:57:01 -0400 Subject: [PATCH] Breaking: Remove support for types not in v3 Core (#40) * Remove support for non v3 types * fix test deps --- Project.toml | 5 +- README.md | 2 +- src/SmallZarrGroups.jl | 2 - src/ZArray.jl | 12 +- src/char-utf32.jl | 87 -------- src/zarr-meta-parsing.jl | 97 +-------- src/zarr-meta-writing.jl | 25 --- test/Artifacts.toml | 6 +- test/Fixture-README.md | 5 +- test/Project.toml | 1 - test/experimental/test_chunking.jl | 1 - test/runtests.jl | 1 - test/test_char-utf32.jl | 339 ----------------------------- test/test_edge-cases.jl | 1 - test/test_read-write-fixture.jl | 10 - test/test_zarr-meta-parsing.jl | 227 +------------------ test/test_zarr-meta-writing.jl | 19 -- 17 files changed, 15 insertions(+), 825 deletions(-) delete mode 100644 src/char-utf32.jl delete mode 100644 test/test_char-utf32.jl diff --git a/Project.toml b/Project.toml index 839fc87..04826ab 100644 --- a/Project.toml +++ b/Project.toml @@ -11,8 +11,6 @@ Blosc_jll = "0b7ba130-8d10-5ba8-a3d6-c5182647fed9" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" -StaticStrings = "4db0a0c5-418a-4e1d-8806-cb305fe13294" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" @@ -20,12 +18,11 @@ ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" [compat] AbstractTrees = "0.4" ArgCheck = "2" +Base64 = "1.8" Blosc_jll = "1.21" CodecZlib = "0.7" DataStructures = "0.18" JSON3 = "1" -StaticArraysCore = "1" -StaticStrings = "0.2" StructArrays = "0.6" TranscodingStreams = "0.9, 0.10" ZipArchives = "0.3, 0.4, 0.5, 1" diff --git a/README.md b/README.md index aa356d6..9462225 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In memory hierarchy of arrays and attributes loaded from disk or to be saved to 1. If you need to store huge datasets that cannot fit uncompressed in memory consider using https://github.com/JuliaIO/HDF5.jl or https://github.com/JuliaIO/Zarr.jl 1. If you just want to serialize arbitrary Julia data consider using https://github.com/JuliaIO/JLD2.jl or https://github.com/invenia/JLSO.jl -2. Numpy datetime64 (“M”) and timedelta64 (“m”) data types are read as Int64. +2. Only Numpy types "b i u f c V" are supported. 3. Zarr filters are not supported. ## Overview diff --git a/src/SmallZarrGroups.jl b/src/SmallZarrGroups.jl index 137f017..6a776c4 100644 --- a/src/SmallZarrGroups.jl +++ b/src/SmallZarrGroups.jl @@ -10,8 +10,6 @@ export ZGroup export attrs export children -include("char-utf32.jl") - include("ZArray.jl") include("ZGroup.jl") diff --git a/src/ZArray.jl b/src/ZArray.jl index 07759b5..fe8d734 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -1,5 +1,4 @@ -using StaticArraysCore -using StaticStrings + const DEFAULT_COMPRESSOR = JSON3.read("""{ "blocksize": 0, @@ -26,22 +25,15 @@ const ZDataTypes = Union{ ComplexF16, ComplexF32, ComplexF64, - StaticString, - SVector{N, CharUTF32} where N, NTuple{N, UInt8} where N, } function isvalidtype(T::Type)::Bool isbitstype(T) && ( - (T <: ZDataTypes) || - (T <: NamedTuple) && all(_isvalidfieldtype, fieldtypes(T)) + (T <: ZDataTypes) ) end -function _isvalidfieldtype(T::Type)::Bool - isvalidtype(T) || (T <: SArray) && isvalidtype(eltype(T)) -end - """ Create a ZArray. diff --git a/src/char-utf32.jl b/src/char-utf32.jl deleted file mode 100644 index d3396c9..0000000 --- a/src/char-utf32.jl +++ /dev/null @@ -1,87 +0,0 @@ -""" -Native endian utf-32 character. -Also represents too high invalid code points from 0x10_FFFF to 0xFFFF_FFFF -Use `isvalid(c::CharUTF32)` to see if the character can be used in a valid UTF-8 string. - -Has a defined fixed total order when compared to `Char`. -`CharUTF32` with value below or equal to 0x1F_FFFF are converted to `Char` when compared to `Char`. -`CharUTF32` with value above 0x1F_FFFF cannot be converted to `Char`. They are ordered after all `Char`. -""" -struct CharUTF32 <: AbstractChar - value::UInt32 - CharUTF32(value::UInt32) = new(value) -end - -function Base.codepoint(c::CharUTF32) - # Not throwing errors if the code point is too high. - # This enables fallbacks like `isascii` to work correctly. - # c.value > 0x1F_FFFF && throw(Base.InvalidCharError(c)) - c.value -end - -Base.typemax(::Type{CharUTF32}) = CharUTF32(typemax(UInt32)) -Base.typemin(::Type{CharUTF32}) = CharUTF32(typemin(UInt32)) - -Base.IteratorSize(::Type{CharUTF32}) = Base.HasShape{0}() - -Base.isless(x::CharUTF32, y::CharUTF32) = isless(x.value, y.value) - -# values above 0x1F_FFFF are after all Char. -function Base.isless(x::Char, y::CharUTF32) - if y.value ≤ 0x1F_FFFF - isless(x, Char(y.value)) - else - true - end -end -function Base.isless(y::CharUTF32, x::Char) - if y.value ≤ 0x1F_FFFF - isless(Char(y.value),x) - else - false - end -end - -Base.:(==)(x::CharUTF32, y::CharUTF32) = x.value == y.value -function Base.:(==)(x::Char, y::CharUTF32) - if y.value ≤ 0x1F_FFFF - Char(y.value) == x - else - false - end -end -function Base.:(==)(y::CharUTF32, x::Char) - if y.value ≤ 0x1F_FFFF - Char(y.value) == x - else - false - end -end - -function Base.hash(c::CharUTF32, h::UInt) - if c.value ≤ 0x1F_FFFF - hash(Char(c.value), h) - else - hash(c.value, h) - end -end - -function Base.write(io::IO, c::CharUTF32) - write(io, UInt32(c)) -end - -function Base.read(io::IO, ::Type{CharUTF32}) - CharUTF32(read(io, UInt32)) -end - -function Base.show(io::IO, c::CharUTF32) - if c.value ≤ 0x1F_FFFF - print(io, typeof(c), '(') - show(io, Char(c)) - print(io, ')') - else - print(io, typeof(c), '(', ) - show(io, c.value) - print(io, ')') - end -end \ No newline at end of file diff --git a/src/zarr-meta-parsing.jl b/src/zarr-meta-parsing.jl index 743d0c2..5720e69 100644 --- a/src/zarr-meta-parsing.jl +++ b/src/zarr-meta-parsing.jl @@ -1,8 +1,6 @@ # Parse zarr array meta data descriptions. using ArgCheck -using StaticArraysCore -using StaticStrings import JSON3 import Base64 @@ -57,10 +55,8 @@ function parse_zarr_type(typestr::String; silence_warnings=false)::ParsedType numthings = parse(Int,rstrip(!isdigit, typestr[3:end])) units = lstrip(isdigit, typestr[3:end])[begin+1:end-1] @argcheck byteorder in "<>|" - @argcheck typechar in "biufcmMSUV" + @argcheck typechar in "biufcV" @argcheck numthings ≥ 0 - @argcheck (typechar in "mM") ⊻ isempty(units) - @argcheck units in ("","Y","M","W","D","h","m","s","ms","μs","us","ns","ps","fs","as") # actual number of bytes if typechar == 'b' @argcheck numthings == 1 @@ -118,39 +114,6 @@ function parse_zarr_type(typestr::String; silence_warnings=false)::ParsedType byteorder = in_native_order ? (1:numthings) : [numthings÷2:-1:1; numthings:-1:numthings÷2+1;], alignment = ALIGNMENT_LOOKUP[tz], ) - elseif (typechar == 'm') | (typechar == 'M') - @argcheck byteorder in "<>" - @argcheck numthings == 8 - silence_warnings || @warn "timedelta64 and datatime64 not supported, converting to Int64" - in_native_order = (byteorder == NATIVE_ORDER) - tz = trailing_zeros(numthings) - return ParsedType(; - julia_type = Int64, - julia_size = 8, - byteorder = in_native_order ? (1:8) : (8:-1:1), - alignment = ALIGNMENT_LOOKUP[4], - ) - elseif typechar == 'S' - return ParsedType(; - julia_type = StaticString{numthings}, - julia_size = numthings, - byteorder = 1:numthings, - alignment = 0, - ) - elseif typechar == 'U' - @argcheck (byteorder in "<>") || iszero(numthings) - in_native_order = (byteorder == NATIVE_ORDER) || iszero(numthings) - _byteorder = if in_native_order - collect(1:numthings*4) - else - collect(Iterators.flatten((4+4i,3+4i,2+4i,1+4i) for i in 0:numthings-1)) - end - return ParsedType(; - julia_type = SVector{numthings, CharUTF32}, - julia_size = numthings*4, - byteorder = _byteorder, - alignment = iszero(numthings) ? 0 : 2, - ) elseif typechar == 'V' return ParsedType(; julia_type = NTuple{numthings, UInt8}, @@ -158,6 +121,8 @@ function parse_zarr_type(typestr::String; silence_warnings=false)::ParsedType byteorder = 1:numthings, alignment = 0, ) + else + error("Unreachable") end end @@ -165,61 +130,7 @@ end Parse a structured zarr typestr """ function parse_zarr_type(descr::JSON3.Array; silence_warnings=false)::ParsedType - current_byte = 0 - max_alignment = 0 - byteorder = Int[] - feldnames = Symbol[] - feldtypes = Type[] - for feld in descr - name::String = feld[1] - parsed_type::ParsedType = if length(feld) == 3 - # Parse static array field. - @argcheck feld[3] isa JSON3.Array - shape::Vector{Int} = collect(Int,feld[3]) - el_type = parse_zarr_type(feld[2]; silence_warnings) - el_size = el_type.julia_size - zarr_el_size = el_type.zarr_size - array_byteorder = Vector{Int}(undef, el_type.zarr_size*prod(shape)) - # This thing converts a row major linear index to a column major index. - # This is needed because numpy static arrays are always in row major order - # and Julia static arrays are always in column major order. - converter_thing = PermutedDimsArray(LinearIndices(Tuple(shape)),reverse(1:length(shape))) - for i in 1:length(converter_thing) - column_major_idx_0::Int = converter_thing[i] - 1 - local byte_offset::Int = column_major_idx_0*el_size - array_byteorder[(1+zarr_el_size*(i-1)):(zarr_el_size*i)] .= el_type.byteorder .+ byte_offset - end - ParsedType(; - julia_type = SArray{Tuple{shape...,}, el_type.julia_type, length(shape), prod(shape)}, - julia_size = el_size*prod(shape), - zarr_size = length(byteorder), - byteorder = array_byteorder, - alignment = el_type.alignment, - ) - elseif length(feld) == 2 - parse_zarr_type(feld[2]; silence_warnings) - else - error("field must have 2 or three elements") - end - push!(feldnames, Symbol(name)) - push!(feldtypes, parsed_type.julia_type) - alignment = parsed_type.alignment - max_alignment = max(max_alignment, alignment) - num_padding = 2^alignment - mod1(current_byte,2^alignment) - current_byte += num_padding - @assert iszero(mod(current_byte, 2^alignment)) - append!(byteorder, parsed_type.byteorder .+ current_byte) - current_byte += parsed_type.julia_size - end - num_padding = 2^max_alignment - mod1(current_byte,2^max_alignment) - current_byte += num_padding - ParsedType(; - julia_type = NamedTuple{(feldnames...,), Tuple{feldtypes...,}}, - julia_size = current_byte, - zarr_size = length(byteorder), - byteorder, - alignment = max_alignment, - ) + error("Structured types not supported") end diff --git a/src/zarr-meta-writing.jl b/src/zarr-meta-writing.jl index 02c796a..a6df005 100644 --- a/src/zarr-meta-writing.jl +++ b/src/zarr-meta-writing.jl @@ -34,33 +34,8 @@ function write_type(io::IO, t::Type) print(io, "\"", NATIVE_ORDER, "c8\"") elseif t <: ComplexF64 print(io, "\"", NATIVE_ORDER, "c16\"") - elseif t <: StaticString - print(io, "\"|S", sizeof(t), "\"") elseif t <: (NTuple{N,UInt8} where N) print(io, "\"|V", sizeof(t), "\"") - elseif t <: (SVector{N,CharUTF32} where N) - print(io, "\"", NATIVE_ORDER, "U", sizeof(t)>>2, "\"") - elseif t <: NamedTuple - print(io, "[") - need_comma = false - for (fname, ftype) in zip(fieldnames(t), fieldtypes(t)) - if need_comma - print(io, ", ") - end - print(io, "[\"", fname, "\", ") - if (ftype <: SArray) && !(eltype(ftype) <: CharUTF32) - write_type(io, eltype(ftype)) - print(io, ", [") - join(io, size(ftype), ", ") - print(io, "]") - else - write_type(io, ftype) - end - print(io, "]") - need_comma = true - end - print(io, "]") - else error("type $t cannot be saved in a zarr array") end diff --git a/test/Artifacts.toml b/test/Artifacts.toml index 990a004..dd70ac3 100644 --- a/test/Artifacts.toml +++ b/test/Artifacts.toml @@ -1,6 +1,6 @@ [fixture] -git-tree-sha1 = "487f11e78ae48112d036c7b146c767e7753b37e6" +git-tree-sha1 = "326a434121863402506a04ef77d42b049ccbcd1a" [[fixture.download]] - sha256 = "4f0cccaaaa303fb876a7c5c3d6005f18cceeec7e73fb90a846600be72e020ed4" - url = "https://github.com/medyan-dev/SmallZarrGroups.jl/releases/download/v0.6.6/fixture.tar.gz" + sha256 = "59bbccec99379cdd4b4e6c7aba4e15cac146138d0c291fc4127955868ce3c934" + url = "https://github.com/medyan-dev/SmallZarrGroups.jl/releases/download/v0.6.6/fixture2.tar.gz" diff --git a/test/Fixture-README.md b/test/Fixture-README.md index 92032f6..5ed9a74 100644 --- a/test/Fixture-README.md +++ b/test/Fixture-README.md @@ -16,13 +16,14 @@ Add the file to the "fixture" directory, and a description to this file. Then run ```julia # This is the url that the artifact will be available from: -url_to_upload_to = "https://github.com/medyan-dev/SmallZarrGroups.jl/releases/download/v0.6.6/fixture.tar.gz" +tar_name = "fixture2.tar.gz" +url_to_upload_to = "https://github.com/medyan-dev/SmallZarrGroups.jl/releases/download/v0.6.6/"*tar_name # This is the path to the Artifacts.toml we will manipulate artifact_toml = "Artifacts.toml" fixture_hash = create_artifact() do artifact_dir cp(fixture_dir, artifact_dir; force=true) end -tar_hash = archive_artifact(fixture_hash, "fixture.tar.gz") +tar_hash = archive_artifact(fixture_hash, tar_name) bind_artifact!(artifact_toml, "fixture", fixture_hash; force=true, download_info = [(url_to_upload_to, tar_hash)] ) diff --git a/test/Project.toml b/test/Project.toml index 7abe62a..52bed82 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -7,6 +7,5 @@ PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SmallZarrGroups = "d423b6e5-1c84-4ae2-8d2d-b903aee15ac7" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" -StaticStrings = "4db0a0c5-418a-4e1d-8806-cb305fe13294" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/experimental/test_chunking.jl b/test/experimental/test_chunking.jl index e977844..5914a00 100644 --- a/test/experimental/test_chunking.jl +++ b/test/experimental/test_chunking.jl @@ -1,6 +1,5 @@ using SmallZarrGroups using DataStructures: SortedDict, OrderedDict -using StaticArrays using Test #These are tests for experimental features that are not stable API yet diff --git a/test/runtests.jl b/test/runtests.jl index 5ddd8ab..4fca50d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,6 @@ using Random Random.seed!(1234) -include("test_char-utf32.jl") include("test_zarr-meta-parsing.jl") include("test_zarr-meta-writing.jl") include("test_simple-usage.jl") diff --git a/test/test_char-utf32.jl b/test/test_char-utf32.jl deleted file mode 100644 index f7c52b4..0000000 --- a/test/test_char-utf32.jl +++ /dev/null @@ -1,339 +0,0 @@ -# Based on tests in Julia "test/char.jl: # This file is a part of Julia. License is MIT: https://julialang.org/license" - -using SmallZarrGroups -using Test - -const C32 = SmallZarrGroups.CharUTF32 - -@testset "CharUTF32" begin - - -@testset "basic properties" begin - @test typemax(C32) == C32(0xffffffff) - @test typemin(C32) == C32(0) - @test ndims(C32) == 0 - @test getindex(C32('a'), 1) == C32('a') - @test_throws BoundsError getindex(C32('a'), 2) - # This is current behavior, but it seems questionable - @test getindex(C32('a'), 1, 1, 1) == C32('a') - @test_throws BoundsError getindex(C32('a'), 1, 1, 2) - - @test C32('b') + 1 == C32('c') - @test typeof(C32('b') + 1) == C32 - @test 1 + C32('b') == C32('c') - @test typeof(1 + C32('b')) == C32 - @test C32('b') - 1 == C32('a') - @test typeof(C32('b') - 1) == C32 - - @test widen(C32('a')) === C32('a') -end - -@testset "ASCII conversion to/from Integer" begin - numberchars = C32.(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']) - lowerchars = C32.(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']) - upperchars = C32.(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']) - plane1_playingcards = C32.(['🂠', '🂡', '🂢', '🂣', '🂤', '🂥', '🂦', '🂧', '🂨', '🂩', '🂪', '🂫', '🂬', '🂭', '🂮']) - plane2_cjkpart1 = C32.(['𠀀', '𠀁', '𠀂', '𠀃', '𠀄', '𠀅', '𠀆', '𠀇', '𠀈', '𠀉', '𠀊', '𠀋', '𠀌', '𠀍', '𠀎', '𠀏']) - - testarrays = [numberchars; lowerchars; upperchars; plane1_playingcards; plane2_cjkpart1] - - #Integer(x::C32) = Int(x) - #tests ASCII 48 - 57 - counter = 48 - for x in numberchars - @test Integer(x) == counter - @test Char(x) == x - @test Integer(Char(x)) == counter - counter += 1 - end - - #tests ASCII 65 - 90 - counter = 65 - for x in upperchars - @test Integer(x) == counter - @test Char(x) == x - @test Integer(Char(x)) == counter - counter += 1 - end - - #tests ASCII 97 - 122 - counter = 97 - for x in lowerchars - @test Integer(x) == counter - @test Char(x) == x - @test Integer(Char(x)) == counter - counter += 1 - end - - #tests Unicode plane 1: 127136 - 127150 - counter = 127136 - for x in plane1_playingcards - @test Integer(x) == counter - @test Char(x) == x - @test Integer(Char(x)) == counter - counter += 1 - end - - #tests Unicode plane 2: 131072 - 131087 - counter = 131072 - for x in plane2_cjkpart1 - @test Integer(x) == counter - @test Char(x) == x - @test Integer(Char(x)) == counter - counter += 1 - end - - for x = 1:9 - @test convert(C32, Float16(x)) == convert(C32, Float32(x)) == convert(C32, Float64(x)) == C32(x) == Char(x) - end - - for x in testarrays - @test size(x) == () - @test_throws BoundsError size(x,0) - @test size(x,1) == 1 - end - - #ndims(c::Char) = 0 - for x in testarrays - @test ndims(x) == 0 - end - - #length(c::Char) = 1 - for x in testarrays - @test length(x) == 1 - end - - #lastindex(c::Char) = 1 - for x in testarrays - @test lastindex(x) == 1 - end - - #getindex(c::Char) = c - for x in testarrays - @test getindex(x) == x - @test getindex(x, CartesianIndex()) == x - end - - #first(c::Char) = c - for x in testarrays - @test first(x) == x - end - - #last(c::Char) = c - for x in testarrays - @test last(x) == x - end - - #eltype(c::C32) = C32 - for x in testarrays - @test eltype(x) == C32 - end - - #iterate(c::Char) - for x in testarrays - @test iterate(x)[1] == x - @test iterate(x, iterate(x)[2]) == nothing - end - - #isless(x::Char, y::Integer) = isless(UInt32(x), y) - for T in (Char, C32) - for x in upperchars - @test isless(x, T(91)) == true - end - - for x in lowerchars - @test isless(x, T(123)) == true - end - - for x in numberchars - @test isless(x, T(66)) == true - end - - for x in plane1_playingcards - @test isless(x, T(127151)) == true - end - - for x in plane2_cjkpart1 - @test isless(x, T(131088)) == true - end - - #isless(x::Integer, y::Char) = isless(x, UInt32(y)) - for x in upperchars - @test isless(T(64), x) == true - end - - for x in lowerchars - @test isless(T(96), x) == true - end - - for x in numberchars - @test isless(T(47), x) == true - end - - for x in plane1_playingcards - @test isless(T(127135), x) == true - end - - for x in plane2_cjkpart1 - @test isless(T(131071), x) == true - end - end - - @test !isequal(C32('x'), 120) - @test convert(Signed, C32('A')) === Int32(65) - @test convert(Unsigned, C32('A')) === UInt32(65) -end - -@testset "issue #14573" begin - array = C32.(['a', 'b', 'c']) + [1, 2, 3] - @test array == ['b', 'd', 'f'] - @test eltype(array) == C32 - - array = [1, 2, 3] + C32.(['a', 'b', 'c']) - @test array == ['b', 'd', 'f'] - @test eltype(array) == C32 - - array = C32.(['a', 'b', 'c']) - [0, 1, 2] - @test array == ['a', 'a', 'a'] - @test eltype(array) == C32 -end - -@testset "sprint, repr" begin - @test sprint(show, "text/plain", C32('$')) == "SmallZarrGroups.CharUTF32('\$'): ASCII/Unicode U+0024 (category Sc: Symbol, currency)" - @test sprint(show, "text/plain", C32('$'), context=:compact => true) == "CharUTF32('\$')" - @test repr(C32('$')) == "SmallZarrGroups.CharUTF32('\$')" -end - -@testset "reading and writing" begin - # writes 4 bytes per char, in native endian byte order. - test_chars = [C32('a'), C32('\U0010ffff')] - for a in test_chars - local iob = IOBuffer() - @test write(iob, C32('a')) == 4 - seekstart(iob) - @test read(iob, C32) == 'a' - seekstart(iob) - @test read(iob, UInt32) == UInt32('a') - end -end - -@testset "abstractchar" begin - @test C32('x') === C32(UInt32('x')) - @test convert(C32, 2.0) == Char(2) - - @test isascii(C32('x')) - @test C32('x') < 'y' - @test C32('x') == 'x' === Char(C32('x')) === convert(Char, C32('x')) - @test C32('x')^3 == "xxx" - @test repr(C32('x')) == "SmallZarrGroups.CharUTF32('x')" - @test string(C32('x')) == "x" - @test length(C32('x')) == 1 - @test !isempty(C32('x')) - @test eltype(C32) == C32 - @test_throws EOFError read(IOBuffer("x"), C32) - @test_throws MethodError ncodeunits(C32('x')) - @test hash(C32('x'), UInt(10)) == hash('x', UInt(10)) - @test Base.IteratorSize(C32) == Base.HasShape{0}() - @test convert(C32, 1) == Char(1) -end - -@testset "broadcasting of Char" begin - @test identity.(C32('a')) == 'a' - @test C32('a') .* [C32('b'), C32('c')] == ["ab", "ac"] -end - -@testset "code point format of U+ syntax (PR 33291)" begin - @test repr("text/plain", C32('\n')) == "SmallZarrGroups.CharUTF32('\\n'): ASCII/Unicode U+000A (category Cc: Other, control)" - @test isascii(C32('\n')) - @test isvalid(C32('\n')) - @test repr("text/plain", C32('/')) == "SmallZarrGroups.CharUTF32('/'): ASCII/Unicode U+002F (category Po: Punctuation, other)" - @test isascii(C32('/')) - @test isvalid(C32('/')) - @test repr("text/plain", C32('\u10e')) == "SmallZarrGroups.CharUTF32('Ď'): Unicode U+010E (category Lu: Letter, uppercase)" - @test !isascii(C32('\u10e')) - @test isvalid(C32('\u10e')) - @test repr("text/plain", C32('\u3a2c')) == "SmallZarrGroups.CharUTF32('㨬'): Unicode U+3A2C (category Lo: Letter, other)" - @test !isascii(C32('\u3a2c')) - @test isvalid(C32('\u3a2c')) - @test repr("text/plain", C32('\udf00')) == "SmallZarrGroups.CharUTF32('\\udf00'): Unicode U+DF00 (category Cs: Other, surrogate)" - @test !isascii(C32('\udf00')) - @test !isvalid(C32('\udf00')) - @test repr("text/plain", C32('\U001f428')) == "SmallZarrGroups.CharUTF32('🐨'): Unicode U+1F428 (category So: Symbol, other)" - @test !isascii(C32('\U001f428')) - @test isvalid(C32('\U001f428')) - @test repr("text/plain", C32('\U010f321')) == "SmallZarrGroups.CharUTF32('\\U10f321'): Unicode U+10F321 (category Co: Other, private use)" - @test !isascii(C32('\U010f321')) - @test isvalid(C32('\U010f321')) - @test repr("text/plain", C32(0x00_10_ff_ff)) == "SmallZarrGroups.CharUTF32('\\U10ffff'): Unicode U+10FFFF (category Cn: Other, not assigned)" - @test !isascii(C32(0x00_10_ff_ff)) - @test isvalid(C32(0x00_10_ff_ff)) - @test repr("text/plain", C32(0x00_1f_ff_ff)) == "SmallZarrGroups.CharUTF32('\\U1fffff'): Unicode U+1FFFFF (category In: Invalid, too high)" - @test !isascii(C32(0x00_1f_ff_ff)) - @test !isvalid(C32(0x00_1f_ff_ff)) - @test repr("text/plain", C32(0x00_20_00_00)) == "SmallZarrGroups.CharUTF32(0x00200000): Unicode U+200000 (category In: Invalid, too high)" - @test !isascii(C32(0x00_20_00_00)) - @test !isvalid(C32(0x00_20_00_00)) - @test repr("text/plain", C32(0xff_ff_ff_ff)) == "SmallZarrGroups.CharUTF32(0xffffffff): Unicode U+FFFFFFFF (category In: Invalid, too high)" - @test !isascii(C32(0xff_ff_ff_ff)) - @test !isvalid(C32(0xff_ff_ff_ff)) -end - -@testset "errors on converting to Char" begin - @test Char(C32('a')) === 'a' - @test Char(C32(0x1F_FF_FF)) === Char(0x1F_FF_FF) - @test_throws Base.CodePointError{UInt32}(0x00200000) Char(C32(0x00200000)) - @test_throws Base.CodePointError{UInt32}(0xFFFFFFFF) Char(C32(0xFFFFFFFF)) - @test String([C32('a'),C32('b')]) == "ab" - @test_throws Base.CodePointError{UInt32}(0x00200000) String([C32('a'),C32(0x00200000)]) -end - -@testset "total ordering" begin - local test_values = sort(Union{Char,C32}[ - Char(0), - C32(0), - Char(0x57), - C32(0x57), - reinterpret(Char, 0x00_00_00_01), - Char(0x1F_FF_FF), - reinterpret(Char, 0x1F_FF_FF_01), - C32(0x1F_FF_FF), - C32(0x00_20_00_00), - C32(0xff_ff_ff_ff), - reinterpret(Char, 0xff_ff_ff_ff), - "\xc0\x80"[1], - reinterpret(Char, 0x57_00_00_01), - C32('\U001f428'), - '\U001f428', - ]) - local n = length(test_values) - for i in 1:n - for j in 1:n - x = test_values[i] - y = test_values[j] - @test isequal(x, y) === isequal(y, x) - if isequal(x, y) - @test hash(x) === hash(y) - end - @test isequal(x, y) + isless(x, y) + isless(y, x) == 1 - if i < j - @test isequal(x, y) | isless(x, y) - else - @test isequal(x, y) | isless(y, x) - end - end - end - for i in 1:n - for j in 1:n - for k in 1:n - x = test_values[i] - y = test_values[j] - z = test_values[k] - if isless(x, y) && isless(y, z) - isless(x, z) - end - end - end - end -end - -end # @testset "CharUTF32" \ No newline at end of file diff --git a/test/test_edge-cases.jl b/test/test_edge-cases.jl index 2c515f8..49f548e 100644 --- a/test/test_edge-cases.jl +++ b/test/test_edge-cases.jl @@ -1,6 +1,5 @@ using SmallZarrGroups using DataStructures: SortedDict, OrderedDict -using StaticArrays using Test @testset "saving and loading attrs on root" begin diff --git a/test/test_read-write-fixture.jl b/test/test_read-write-fixture.jl index 38c3147..b52f986 100644 --- a/test/test_read-write-fixture.jl +++ b/test/test_read-write-fixture.jl @@ -1,5 +1,4 @@ using SmallZarrGroups -using StaticArrays using DataStructures: SortedDict, OrderedDict using Test using Pkg.Artifacts @@ -110,13 +109,4 @@ end SmallZarrGroups.save_dir(path, g) disk_load_compare(zarr, path) end -end - -@testset "UTF32 SVector zarr-python compatibility" begin - g = ZGroup() - g["a"] = [SA[SmallZarrGroups.CharUTF32('🐨'),SmallZarrGroups.CharUTF32('🐨')]] - mktempdir() do path - SmallZarrGroups.save_dir(path, g) - disk_load_compare(zarr, path) - end end \ No newline at end of file diff --git a/test/test_zarr-meta-parsing.jl b/test/test_zarr-meta-parsing.jl index 71c5e14..1e53410 100644 --- a/test/test_zarr-meta-parsing.jl +++ b/test/test_zarr-meta-parsing.jl @@ -1,8 +1,6 @@ using SmallZarrGroups using JSON3 using Test -using StaticArrays -using StaticStrings "Character for native byte order" const NATIVE_ORDER = (ENDIAN_BOM == 0x04030201) ? '<' : '>' @@ -19,8 +17,6 @@ const OTHER_ORDER = (ENDIAN_BOM == 0x04030201) ? '>' : '<' alignment = 0, ) tests = [ - "S0"=>StaticString{0}, - "U0"=>SVector{0,SmallZarrGroups.CharUTF32}, "V0"=>NTuple{0,UInt8}, ] for pair in tests @@ -42,7 +38,6 @@ const OTHER_ORDER = (ENDIAN_BOM == 0x04030201) ? '>' : '<' "b1"=>Bool, "i1"=>Int8, "u1"=>UInt8, - "S1"=>StaticString{1}, "V1"=>NTuple{1,UInt8}, ] for pair in tests @@ -105,28 +100,6 @@ const OTHER_ORDER = (ENDIAN_BOM == 0x04030201) ? '>' : '<' ) end end - @testset "datetime types" begin - tests = [ - "M8[ns]", - "m8[ns]", - "M8[D]", - "m8[D]", - ] - for teststr in tests - @test SmallZarrGroups.parse_zarr_type(NATIVE_ORDER*teststr; silence_warnings=true) == SmallZarrGroups.ParsedType( - julia_type = Int64, - julia_size = 8, - byteorder = 1:8, - alignment = 3, - ) - @test SmallZarrGroups.parse_zarr_type(OTHER_ORDER*teststr; silence_warnings=true) == SmallZarrGroups.ParsedType( - julia_type = Int64, - julia_size = 8, - byteorder = 8:-1:1, - alignment = 3, - ) - end - end @testset "static bytes types" begin staticstringtype(t,n) = SmallZarrGroups.ParsedType( julia_type = t{n}, @@ -134,211 +107,15 @@ const OTHER_ORDER = (ENDIAN_BOM == 0x04030201) ? '>' : '<' byteorder = 1:n, alignment = 0, ) - for (typestr, t) in ("S"=>StaticString, "V"=>(NTuple{N,UInt8} where N)) + for (typestr, t) in ("V" => (NTuple{N,UInt8} where N),) for n in 0:1050 for order in "<>|" @test SmallZarrGroups.parse_zarr_type(order*typestr*string(n)) == staticstringtype(t,n) end end end - @test SmallZarrGroups.parse_zarr_type("|S100000") == staticstringtype(StaticString,100000) @test SmallZarrGroups.parse_zarr_type("|V100000") == staticstringtype((NTuple{N,UInt8} where N),100000) end - @testset "static 32bit char vector" begin - @test SmallZarrGroups.parse_zarr_type(NATIVE_ORDER*"U1") == SmallZarrGroups.ParsedType( - julia_type = SVector{1,SmallZarrGroups.CharUTF32}, - julia_size = 4, - byteorder = 1:4, - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(NATIVE_ORDER*"U2") == SmallZarrGroups.ParsedType( - julia_type = SVector{2,SmallZarrGroups.CharUTF32}, - julia_size = 8, - byteorder = 1:8, - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(NATIVE_ORDER*"U3") == SmallZarrGroups.ParsedType( - julia_type = SVector{3,SmallZarrGroups.CharUTF32}, - julia_size = 12, - byteorder = 1:12, - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(NATIVE_ORDER*"U3000") == SmallZarrGroups.ParsedType( - julia_type = SVector{3000,SmallZarrGroups.CharUTF32}, - julia_size = 12000, - byteorder = 1:12000, - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(OTHER_ORDER*"U1") == SmallZarrGroups.ParsedType( - julia_type = SVector{1,SmallZarrGroups.CharUTF32}, - julia_size = 4, - byteorder = 4:-1:1, - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(OTHER_ORDER*"U2") == SmallZarrGroups.ParsedType( - julia_type = SVector{2,SmallZarrGroups.CharUTF32}, - julia_size = 8, - byteorder = [4,3,2,1,8,7,6,5], - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(OTHER_ORDER*"U3") == SmallZarrGroups.ParsedType( - julia_type = SVector{3,SmallZarrGroups.CharUTF32}, - julia_size = 12, - byteorder = [4,3,2,1,8,7,6,5,12,11,10,9], - alignment = 2, - ) - @test SmallZarrGroups.parse_zarr_type(OTHER_ORDER*"U5") == SmallZarrGroups.ParsedType( - julia_type = SVector{5,SmallZarrGroups.CharUTF32}, - julia_size = 20, - byteorder = [4,3,2,1,8,7,6,5,12,11,10,9,16,15,14,13,20,19,18,17], - alignment = 2, - ) - end -end - - -@testset "structured type parsing with no shape" begin - read_parse(s) = SmallZarrGroups.parse_zarr_type(JSON3.read(s)) - @testset "zero fields" begin - @test read_parse("[]") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(), Tuple{}}, - julia_size = 0, - zarr_size = 0, - byteorder = [], - alignment = 0, - ) - end - @testset "one field" begin - @test read_parse("""[["r", "|u1"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{r::UInt8}, - julia_size = 1, - zarr_size = 1, - byteorder = [1], - alignment = 0, - ) - @test read_parse("""[["g", "$(NATIVE_ORDER)u8"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{g::UInt64}, - julia_size = 8, - zarr_size = 8, - byteorder = 1:8, - alignment = 3, - ) - @test read_parse("""[["g", "$(OTHER_ORDER)u8"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{g::UInt64}, - julia_size = 8, - zarr_size = 8, - byteorder = 8:-1:1, - alignment = 3, - ) - end - @testset "simple structs" begin - @test read_parse("""[["r", "|u1"], ["g", "|u1"], ["b", "|u1"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{r::UInt8,g::UInt8,b::UInt8}, - julia_size = 3, - zarr_size = 3, - byteorder = 1:3, - alignment = 0, - ) - end - @testset "struct alignment" begin - @test read_parse("""[["r", "|u1"], ["g", "$(NATIVE_ORDER)u2"], ["b", "|u1"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{r::UInt8,g::UInt16,b::UInt8}, - julia_size = 6, - zarr_size = 4, - byteorder = [1,3,4,5], - alignment = 1, - ) - @test read_parse("""[["r", "|u1"], ["g", "|u1"], ["b", "$(NATIVE_ORDER)u2"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{r::UInt8,g::UInt8,b::UInt16}, - julia_size = 4, - zarr_size = 4, - byteorder = 1:4, - alignment = 1, - ) - @test read_parse("""[["r", "|u1"], ["g", "$(NATIVE_ORDER)u4"], ["b", "$(OTHER_ORDER)u2"]]""") == SmallZarrGroups.ParsedType( - julia_type = @NamedTuple{r::UInt8,g::UInt32,b::UInt16}, - julia_size = 12, - zarr_size = 7, - byteorder = [1,5,6,7,8,10,9], - alignment = 2, - ) - end - @testset "nested structs" begin - @test read_parse("""[["foo", "$(NATIVE_ORDER)f4"], ["bar", [["baz", "$(NATIVE_ORDER)f4"], ["qux", "$(NATIVE_ORDER)i4"]]]]""") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(:foo, :bar), Tuple{Float32, @NamedTuple{baz::Float32, qux::Int32}}}, - julia_size = 12, - zarr_size = 12, - byteorder = 1:12, - alignment = 2, - ) - end -end - -@testset "structured type parsing with shape" begin - read_parse(s) = SmallZarrGroups.parse_zarr_type(JSON3.read(s)) - @testset "zarr example" begin - @test read_parse("""[["x", "$(NATIVE_ORDER)f4"], ["y", "$(NATIVE_ORDER)f4"], ["z", "$(NATIVE_ORDER)f4", [2, 2]]]""") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(:x, :y, :z), Tuple{Float32, Float32, SMatrix{2,2,Float32,4}}}, - julia_size = 24, - zarr_size = 24, - byteorder = [1,2,3,4, 5,6,7,8, 9,10,11,12, 17,18,19,20, 13,14,15,16, 21,22,23,24,], - alignment = 2, - ) - end - @testset "zero dimensions" begin - @test read_parse("""[["z", "|u1", []]]""") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(:z,), Tuple{SArray{Tuple{},UInt8,0,1}}}, - julia_size = 1, - zarr_size = 1, - byteorder = [1], - alignment = 0, - ) - end - @testset "zero size" begin - @test read_parse("""[["z", "|u1", [0]]]""") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(:z,), Tuple{SArray{Tuple{0,},UInt8,1,0}}}, - julia_size = 0, - zarr_size = 0, - byteorder = [], - alignment = 0, - ) - end - @testset "one size" begin - @test read_parse("""[["z", "|u1", [1]]]""") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(:z,), Tuple{SArray{Tuple{1,},UInt8,1,1}}}, - julia_size = 1, - zarr_size = 1, - byteorder = [1], - alignment = 0, - ) - end - @testset "non square matrix" begin - @test read_parse("""[["z", "|u1", [2,3]]]""") == SmallZarrGroups.ParsedType( - julia_type = NamedTuple{(:z,), Tuple{SArray{Tuple{2,3},UInt8,2,6}}}, - julia_size = 6, - zarr_size = 6, - byteorder = [1,3,5,2,4,6], - alignment = 0, - ) - end - @testset "non square matrix alignment" begin - @test read_parse("""[["z", [["z", "|u1"],["b", " [0x04, 0x10], ("BBB=", "$(OTHER_ORDER)f2") => [0x10, 0x04], - ("BBBBCC==", JSON3.read("""[["r", "|u1"], ["g", "$(NATIVE_ORDER)u2"], ["b", "|u1"]]""")) => [0x04,0x00,0x10,0x41,0x08,0x00], - ("BBBBCC==", JSON3.read("""[["r", "|u1"], ["g", "$(OTHER_ORDER)u2"], ["b", "|u1"]]""")) => [0x04,0x00,0x41,0x10,0x08,0x00], (0, "$(NATIVE_ORDER)f2") => [0x00, 0x00], (1, "$(NATIVE_ORDER)u2") => [0x01, 0x00], diff --git a/test/test_zarr-meta-writing.jl b/test/test_zarr-meta-writing.jl index 6c6528d..98d6475 100644 --- a/test/test_zarr-meta-writing.jl +++ b/test/test_zarr-meta-writing.jl @@ -1,8 +1,6 @@ using SmallZarrGroups using JSON3 using Test -using StaticArrays -using StaticStrings "Character for native byte order" const NATIVE_ORDER = (ENDIAN_BOM == 0x04030201) ? '<' : '>' @@ -27,24 +25,7 @@ const OTHER_ORDER = (ENDIAN_BOM == 0x04030201) ? '>' : '<' ComplexF32 => '"'*NATIVE_ORDER*"c8\"", ComplexF64 => '"'*NATIVE_ORDER*"c16\"", NTuple{0,UInt8} => '"'*"|V0\"", - StaticString{0} => '"'*"|S0\"", NTuple{55,UInt8} => '"'*"|V55\"", - StaticString{34} => '"'*"|S34\"", - SVector{0,SmallZarrGroups.CharUTF32} => '"'*NATIVE_ORDER*"U0\"", - SVector{27,SmallZarrGroups.CharUTF32} => '"'*NATIVE_ORDER*"U27\"", - @NamedTuple{} => "[]", - @NamedTuple{r::UInt8} => """[["r", "|u1"]]""", - @NamedTuple{g::UInt64} => """[["g", "$(NATIVE_ORDER)u8"]]""", - @NamedTuple{r::UInt8,g::UInt8,b::UInt8} => """[["r", "|u1"], ["g", "|u1"], ["b", "|u1"]]""", - @NamedTuple{r::UInt8,g::SVector{27,SmallZarrGroups.CharUTF32},b::UInt8} => """[["r", "|u1"], ["g", "$(NATIVE_ORDER)U27"], ["b", "|u1"]]""", - NamedTuple{(:foo, :bar), Tuple{Float32, @NamedTuple{baz::Float32, qux::Int32}}} => """[["foo", "$(NATIVE_ORDER)f4"], ["bar", [["baz", "$(NATIVE_ORDER)f4"], ["qux", "$(NATIVE_ORDER)i4"]]]]""", - NamedTuple{(:x, :y, :z), Tuple{Float32, Float32, SMatrix{2,2,Float32,4}}} => """[["x", "$(NATIVE_ORDER)f4"], ["y", "$(NATIVE_ORDER)f4"], ["z", "$(NATIVE_ORDER)f4", [2, 2]]]""", - NamedTuple{(:z,), Tuple{SArray{Tuple{},UInt8,0,1}}} => """[["z", "|u1", []]]""", - NamedTuple{(:z,), Tuple{SArray{Tuple{0,},UInt8,1,0}}} => """[["z", "|u1", [0]]]""", - NamedTuple{(:z,), Tuple{SArray{Tuple{1,},UInt8,1,1}}} => """[["z", "|u1", [1]]]""", - NamedTuple{(:z,), Tuple{SArray{Tuple{2,3},UInt8,2,6}}} => """[["z", "|u1", [2, 3]]]""", - NamedTuple{(:z,), Tuple{SArray{Tuple{2,3},NamedTuple{(:z,:b,), Tuple{UInt8,UInt16}},2,6}}} => """[["z", [["z", "|u1"], ["b", " """[["z", "|u1", [2, 3, 2]]]""", ] for (type, str) in tests @test sprint(SmallZarrGroups.write_type,type) == str