Skip to content

Commit

Permalink
Merge pull request #35 from mkitti/master
Browse files Browse the repository at this point in the history
Add Zstandard
  • Loading branch information
bogovicj authored Dec 22, 2023
2 parents dc3d3fa + 90bee24 commit 2072f38
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ This implementation currently supports the following Zarr features
<dt>Arbitrary meta-data</dt>
<dd>stored as JSON for both groups and datasets.</dd>
<dt>Compression</dt>
<dd>currently, only the most relevant compression schemes (Blosc, GZip, Zlib, and BZ2) are supported, we can add others later as necessary.</dd>
<dd>currently, only the most relevant compression schemes (Zstandard, Blosc, GZip, Zlib, and BZ2) are supported, we can add others later as necessary.</dd>
<dt>Primitive types as little and big endian</dt>
<dd>so far, I have tested unsigned and signed integers with 1, 2, 4 and 8 bytes, and floats with 4 and 8 bytes. The behavior for other types is untested because I did not have meaningful examples. Complex numbers should be mapped into the best matching primitive real type. Other numpy data types such as strings, timedeltas, objects, dates, or others should come out as uncompressed bytes.</dd>
</dl>
Expand Down
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@
<groupId>org.janelia.saalfeldlab</groupId>
<artifactId>n5-blosc</artifactId>
</dependency>
<dependency>
<groupId>org.janelia</groupId>
<artifactId>n5-zstandard</artifactId>
<version>1.0.2</version>
</dependency>

<!-- Test dependencies -->
<dependency>
Expand Down
39 changes: 39 additions & 0 deletions src/main/java/org/janelia/saalfeldlab/n5/zarr/ZarrCompressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.janelia.saalfeldlab.n5.GzipCompression;
import org.janelia.saalfeldlab.n5.RawCompression;
import org.janelia.saalfeldlab.n5.blosc.BloscCompression;
import org.janelia.scicomp.n5.zstandard.ZstandardCompression;

import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
Expand All @@ -55,6 +56,7 @@ public interface ZarrCompressor {

/* idiotic stream based initialization because Java cannot have static initialization code in interfaces */
public static Map<String, Class<? extends ZarrCompressor>> registry = Stream.of(
new SimpleImmutableEntry<>("zstd", Zstandard.class),
new SimpleImmutableEntry<>("blosc", Blosc.class),
new SimpleImmutableEntry<>("zlib", Zlib.class),
new SimpleImmutableEntry<>("gzip", Gzip.class),
Expand All @@ -75,6 +77,8 @@ public static ZarrCompressor fromCompression(final Compression compression) {
return useZlib != null && useZlib ? new Zlib((GzipCompression)compression) : new Gzip((GzipCompression)compression);
} else if (compression instanceof Bzip2Compression) {
return new Bz2((Bzip2Compression)compression);
} else if (compression instanceof ZstandardCompression) {
return new Zstandard((ZstandardCompression)compression);
} else {
return new Raw();
}
Expand All @@ -85,8 +89,40 @@ public static ZarrCompressor fromCompression(final Compression compression) {

public Compression getCompression();

public static class Zstandard implements ZarrCompressor {

@SuppressWarnings("unused")
private final String id = "zstd";
private final int level;
private final transient int nbWorkers;

public Zstandard(int level) {
this(level, 0);
}

public Zstandard(int level, int nbWorkers) {
this.level = level;
this.nbWorkers = nbWorkers;
}

public Zstandard(ZstandardCompression compression) {
this.level = compression.getLevel();
this.nbWorkers = compression.getNbWorkers();
}

@Override
public Compression getCompression() {
ZstandardCompression compression = new ZstandardCompression(level);
if(this.nbWorkers != 0)
compression.setNbWorkers(this.nbWorkers);
return compression;
}

}

public static class Blosc implements ZarrCompressor {

@SuppressWarnings("unused")
private final String id = "blosc";
private final String cname;
private final int clevel;
Expand Down Expand Up @@ -147,6 +183,7 @@ public BloscCompression getCompression() {

public static class Zlib implements ZarrCompressor {

@SuppressWarnings("unused")
private final String id = "zlib";
private final int level;

Expand Down Expand Up @@ -174,6 +211,7 @@ public GzipCompression getCompression() {

public static class Gzip implements ZarrCompressor {

@SuppressWarnings("unused")
private final String id = "gzip";
private final int level;

Expand Down Expand Up @@ -201,6 +239,7 @@ public GzipCompression getCompression() {

public static class Bz2 implements ZarrCompressor {

@SuppressWarnings("unused")
private final String id = "bz2";
private final int level;

Expand Down
5 changes: 5 additions & 0 deletions src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.janelia.saalfeldlab.n5.StringDataBlock;
import org.janelia.saalfeldlab.n5.blosc.BloscCompression;
import org.janelia.saalfeldlab.n5.imglib2.N5Utils;
import org.janelia.scicomp.n5.zstandard.ZstandardCompression;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
Expand Down Expand Up @@ -169,6 +170,10 @@ protected Compression[] getCompressions() {
new GzipCompression(5, true),
new BloscCompression(),
new BloscCompression("lz4", 6, BloscCompression.BITSHUFFLE, 0, 4),
new ZstandardCompression(),
new ZstandardCompression(0),
new ZstandardCompression(-1),
//add new compressions here
new RawCompression()
};
}
Expand Down
2 changes: 1 addition & 1 deletion src/test/python/zarr-nested-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from pathlib import Path
import numpy as np
import zarr
from numcodecs import Zlib, GZip, BZ2
from numcodecs import Zlib, GZip, BZ2, Zstd
import sys
import os

Expand Down
17 changes: 15 additions & 2 deletions src/test/python/zarr-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from pathlib import Path
import numpy as np
import zarr
from numcodecs import Zlib, GZip, BZ2
from numcodecs import Zlib, GZip, BZ2, Zstd
import sys
import os

Expand Down Expand Up @@ -194,6 +194,13 @@
data=array_30x20_c,
chunks=(7, 13),
overwrite=True)
group.array(
name='30x20_c_u8_zstd',
dtype='>u8',
compressor=Zstd(level=1),
data=array_30x20_c,
chunks=(7, 13),
overwrite=True)

group.array(
name='3x2_c_u4_f1',
Expand Down Expand Up @@ -244,4 +251,10 @@
data=array_3x2_str_c,
chunks=(2, 2),
overwrite=True)

group.array(
name='3x2_c_str_zstd',
dtype=str,
compressor=Zstd(level=1),
data=array_3x2_str_c,
chunks=(2, 2),
overwrite=True)

0 comments on commit 2072f38

Please sign in to comment.