diff --git a/pom.xml b/pom.xml index 182e90c..b9616ba 100644 --- a/pom.xml +++ b/pom.xml @@ -147,6 +147,7 @@ org.janelia.saalfeldlab n5 + 3.3.1-SNAPSHOT net.imglib2 @@ -162,6 +163,10 @@ ${n5-zstandard.version} + commons-codec + commons-codec + + org.apache.commons commons-compress diff --git a/pyproject.toml b/pyproject.toml index 2e54cc3..cf7f962 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ zarr = "^2.15.0" numcodecs = "^0.11.0" pathlib = "^1.0.1" tensorstore = "^0.1.64" +crc32c = "^2.7.1" [build-system] diff --git a/src/main/java/org/janelia/saalfeldlab/n5/zarr/DType.java b/src/main/java/org/janelia/saalfeldlab/n5/zarr/DType.java index 15c903a..b77b51e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/zarr/DType.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/zarr/DType.java @@ -478,6 +478,9 @@ public int getNBits() { } public byte[] createFillBytes(final String fill_value) { + if (fill_value == null) { + return createFillBytes("0"); + } final byte[] fillBytes = new byte[nBytes]; final ByteBuffer fillBuffer = ByteBuffer.wrap(fillBytes); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZArrayAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZArrayAttributes.java index 62522ee..cd77fd5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZArrayAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/zarr/ZArrayAttributes.java @@ -138,7 +138,7 @@ public ZarrDatasetAttributes getDatasetAttributes() { dtype, compressor.getCompression(), isRowMajor, - fill_value.getAsString(), + fill_value.isJsonNull() ? null : fill_value.getAsString(), dimensionSeparator); } @@ -243,7 +243,7 @@ public ZArrayAttributes deserialize(JsonElement json, Type typeOfT, JsonDeserial context.deserialize( obj.get("chunks"), int[].class), dType, // fix context.deserialize( obj.get("compressor"), ZarrCompressor.class), // fix - obj.get("fill_value").getAsString(), + obj.get("fill_value").isJsonNull() ? null : obj.get("fill_value").getAsString(), obj.get("order").getAsCharacter(), sepElem != null ? sepElem.getAsString() : ".", filters); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3DatasetAttributes.java index 45ff1ee..e8d7647 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3DatasetAttributes.java @@ -153,7 +153,7 @@ else if (codecs == null) return out; } - private static Codec[] prependArrayToBytes(Codec.ArrayToBytes arrayToBytes, Codec[] codecs) { + private static Codec[] prependArrayToBytes(Codec.ArrayCodec arrayToBytes, Codec[] codecs) { final Codec[] out = new Codec[codecs.length + 1]; out[0] = arrayToBytes; @@ -238,7 +238,7 @@ public HashMap asMap() { map.put(FILL_VALUE_KEY, fillValue); // map.put(CODECS_KEY, codecsToZarrCompressors(getCodecs())); - map.put(CODECS_KEY, prependArrayToBytes(getArrayToBytesCodec(), getCodecs())); + map.put(CODECS_KEY, prependArrayToBytes(getArrayCodec(), getCodecs())); return map; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3KeyValueWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3KeyValueWriter.java index 0cd1b3a..8a61dd6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3KeyValueWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/zarr/v3/ZarrV3KeyValueWriter.java @@ -190,12 +190,12 @@ protected ZarrV3DatasetAttributes createZArrayAttributes(final DatasetAttributes dType, "0", datasetAttributes.getCompression(), - prependArrayToBytes(datasetAttributes.getArrayToBytesCodec(), datasetAttributes.getCodecs())); + prependArrayToBytes(datasetAttributes.getArrayCodec(), datasetAttributes.getCodecs())); return zArrayAttributes; } - private static Codec[] prependArrayToBytes(Codec.ArrayToBytes arrayToBytes, Codec[] codecs) { + private static Codec[] prependArrayToBytes(Codec.ArrayCodec arrayToBytes, Codec[] codecs) { final Codec[] out = new Codec[codecs.length + 1]; out[0] = arrayToBytes; diff --git a/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java b/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java index cd0f74b..7f3b3e6 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/zarr/N5ZarrTest.java @@ -464,10 +464,19 @@ private static > void assertIsSequence( @Test public void testReadZarrPython() throws IOException, InterruptedException { - final String testZarrDirPath = tempN5Location(); - + final String testZarrDirPath = tempN5Location(); + //TODO: decided what to do with it for windows + String testZarrDirPathForPython; + + if (System.getProperty("os.name").startsWith("Windows")) + testZarrDirPathForPython = testZarrDirPath.substring(1); + else + testZarrDirPathForPython = testZarrDirPath; + + System.err.println("For Python: " + testZarrDirPathForPython); + /* create test data with python */ - if (!runPythonTest("zarr-test.py", testZarrDirPath)) { + if (!runPythonTest("zarr-test.py", testZarrDirPathForPython)) { System.out.println("Couldn't run Python test, skipping compatibility test with Python."); return; } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/zarr/TensorstoreTest.java b/src/test/java/org/janelia/saalfeldlab/n5/zarr/TensorstoreTest.java index 08341c3..dd6d513 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/zarr/TensorstoreTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/zarr/TensorstoreTest.java @@ -5,8 +5,13 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStreamReader; import java.lang.ProcessBuilder.Redirect; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.ShortBuffer; import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; @@ -15,14 +20,26 @@ import java.util.HashSet; import java.util.List; import java.util.concurrent.TimeUnit; +//import java.util.zip.CRC32; -import org.apache.commons.io.FileUtils; +import org.apache.commons.codec.digest.PureJavaCrc32C; +import org.janelia.saalfeldlab.n5.Compression; +import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.N5Reader; + +import org.apache.commons.io.FileUtils; + import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.RawCompression; +import org.janelia.saalfeldlab.n5.ShortArrayDataBlock; +import org.janelia.saalfeldlab.n5.blosc.BloscCompression; import org.janelia.saalfeldlab.n5.imglib2.N5Utils; import org.janelia.saalfeldlab.n5.zarr.v3.ZarrV3KeyValueWriter; +import org.janelia.scicomp.n5.zstandard.ZstandardCompression; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -35,8 +52,10 @@ import net.imglib2.type.numeric.integer.LongType; import net.imglib2.type.numeric.integer.UnsignedByteType; import net.imglib2.type.numeric.integer.UnsignedIntType; +import net.imglib2.type.numeric.integer.UnsignedLongType; import net.imglib2.type.numeric.real.DoubleType; import net.imglib2.type.numeric.real.FloatType; +import net.imglib2.view.RandomAccessiblePair.RandomAccess; import net.imglib2.view.Views; public class TensorstoreTest { @@ -44,7 +63,7 @@ public class TensorstoreTest { private String testZarrBaseName = "tensorstore_tests"; private static enum Version { - zarr2, zarr3 + zarr2, zarr3, n5 }; private HashSet paths; @@ -112,6 +131,63 @@ private boolean runPythonTest(final String script, final String containerPath, f } + public long runPythonChecksum(final String containerPath, final Version version) throws InterruptedException { + try { + List pythonArgs = new ArrayList<>(); + String checksumResult = ""; + pythonArgs.addAll(Arrays.asList( + "poetry", "run", "python", "src/test/python/tensorstore_checksum.py", containerPath, version.toString() + )); + System.out.println(String.join(" ", pythonArgs)); + + final ProcessBuilder pb = new ProcessBuilder(pythonArgs.toArray(new String[0])); + + // pb.redirectOutput(ProcessBuilder.Redirect.INHERIT); + pb.redirectError(ProcessBuilder.Redirect.INHERIT); + final Process process = pb.start(); + + + // Wait for the process to complete or timeout after 10 seconds + final boolean timedOut = !process.waitFor(10, TimeUnit.SECONDS); + + if (timedOut) + System.err.println("The Python process timed out!"); + + + final int exitCode = process.exitValue(); + + if (exitCode == 0) { + System.out.println("Python checksum process completed successfully."); + } else { + System.err.println("Python checksum process failed with exit code: " + exitCode); + } + + InputStreamReader ISReader = new InputStreamReader(process.getInputStream()); + BufferedReader BReader = new BufferedReader(ISReader); + + String line; + while((line=BReader.readLine()) !=null) { + System.out.println(line); + checksumResult = line; + } + + System.out.println("Python Checksum Result String: " + checksumResult); + + String[] splits = checksumResult.split(" "); + String numStr = splits[splits.length-1]; + + long checksum = Long.parseLong(numStr); + System.out.println("Python Checksum Result Long: " + checksum); + + process.destroy(); + return checksum; + + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException("An error occurred while running the Python checksum process", e); + } + } + private static > void assertIsSequence( final RandomAccessibleInterval source, final T ref) { @@ -139,24 +215,111 @@ private static > void assertIsSequence( } @Test - public void testReadTensorstoreZarr2() throws IOException, InterruptedException { + public void testReadTensorstoreZarr3() throws Exception { + testReadTensorstore(Version.zarr3); + } + + public void testReadTensorstoreZarr2() throws IOException, InterruptedException { testReadTensorstore(Version.zarr2); } @Test - public void testReadTensorstoreZarr3() throws IOException, InterruptedException { + public void testReadTensorstoreN5() throws Exception { + testReadTensorstore(Version.n5); + } + + @Test + public void testReadTensorstoreChecksumZarr2() throws IOException, InterruptedException{ + testReadChecksum(Version.zarr2); + } - testReadTensorstore(Version.zarr3); + public void testReadChecksum(Version version) throws IOException, InterruptedException { + + ZarrKeyValueWriter n5Zarr = new ZarrKeyValueWriter(new FileSystemKeyValueAccess(FileSystems.getDefault()), tempN5Location(), new GsonBuilder(), false, false, "/", false); + + final String testZarrDatasetName = String.join("/", testZarrBaseName, version.toString()); + + n5Zarr.createDataset( + testZarrDatasetName, + new long[]{1, 2, 3}, + new int[]{1, 2, 3}, + DataType.UINT16, + new GzipCompression(4) //new GzipCompression() + //ZarrCompressor.fromCompression(new GzipCompression(5)) + //ZarrCompressor.fromCompression(new BloscCompression("blosc", BloscCompression.BITSHUFFLE,5,0, 1)) + ); + + + final DatasetAttributes attributes = n5Zarr.getDatasetAttributes(testZarrDatasetName); + final short[] shortBlock = new short[]{1, 2, 3, 4, 5, 6}; + ByteBuffer byteBuffer = ByteBuffer.allocate(shortBlock.length * 2); + byteBuffer.order(ByteOrder.nativeOrder()); + byteBuffer.asShortBuffer().put(shortBlock); + byte[] barray = byteBuffer.array(); + + PureJavaCrc32C crc32c = new PureJavaCrc32C(); + crc32c.update(barray, 0, barray.length); + + /* + for(byte b: barray) { + System.out.println("byte: " + b); + }*/ + + long javaChecksum = crc32c.getValue(); + if (javaChecksum == -1) { + System.out.println("Couldn't run Checksum Java test, skipping compatibility test."); + return; + } + //System.out.println("Checksum from Java: " + javaChecksum); + + //final ShortBuffer sBuffer = ShortBuffer.wrap(shortBlock); + final ShortArrayDataBlock dataBlock = new ShortArrayDataBlock(new int[]{1, 2, 3}, new long[]{0, 0, 0}, shortBlock); + n5Zarr.writeBlock(testZarrDatasetName, attributes, dataBlock); + //System.out.println("Checksum from Java Path: "+ n5Zarr.getURI().getPath() + "\\test\\tensorstore"); + + // pythonZarrPath + //final String testZarrDirPath = "C:\\Users\\chend\\AppData\\Local\\Temp\\zarr3-tensorstore-test_python_o0dnjj3f.zarr\\tensorstore_tests\\zarr2\\3x2_f_u4"; + final String testZarrDirPath =(n5Zarr.getURI().getPath().substring(1) + testZarrBaseName + "/" + version); + + + //TODO: decided what to do with it for windows + String testZarrDirPathForPython = testZarrDirPath; + + /* + * if (System.getProperty("os.name").startsWith("Windows")) + * testZarrDirPathForPython = testZarrDirPath.substring(1); else + * testZarrDirPathForPython = testZarrDirPath; + */ + + System.err.println("For Python: " + testZarrDirPathForPython); + + long pythonChecksum = runPythonChecksum(testZarrDirPathForPython, version); + if (pythonChecksum == -1) { + System.out.println("Couldn't run Checksum Python test, skipping compatibility test with Python."); + return; + } + System.out.println("\n---------------------------------------"); + System.out.println("Checksum from Python: " + pythonChecksum); + System.out.println("Checksum from Python Path: " + testZarrDirPath); + + System.out.println("Checksum from Java: " + javaChecksum); + System.out.println("Checksum from Java Path: "+ n5Zarr.getURI().getPath().substring(1) + "test/tensorstore"); + + // Compare checksums + if (pythonChecksum == javaChecksum) { + System.out.println("Checksums match!"); + } else { + System.err.println("Checksums do not match!"); + } } + private static String versionFlag(Version version) { - return "--" + version; } public void testReadTensorstore(Version version) throws IOException, InterruptedException { - final String testZarrDirPath = tempN5Location(); //TODO: decided what to do with it for windows String testZarrDirPathForPython; @@ -187,6 +350,7 @@ public void testReadTensorstore(Version version) throws IOException, Interrupted /* groups */ final String testZarrDatasetName = String.join("/", testZarrBaseName, version.toString()); + final String testZarrGroupName = testZarrDatasetName; assertTrue(n5Zarr.exists(testZarrDatasetName)); assertFalse(n5Zarr.datasetExists(testZarrDatasetName)); @@ -199,84 +363,151 @@ public void testReadTensorstore(Version version) throws IOException, Interrupted assertArrayEquals(datasetAttributesC.getBlockSize(), new int[]{3, 2}); assertEquals(DataType.INT64, datasetAttributesC.getDataType()); + final DatasetAttributes datasetAttributesF = n5Zarr.getDatasetAttributes(testZarrDatasetName + "/3x2_f_i8"); + assertArrayEquals(datasetAttributesF.getDimensions(), new long[]{2, 3}); + assertArrayEquals(datasetAttributesF.getBlockSize(), new int[]{2, 3}); + assertEquals(DataType.INT64, datasetAttributesF.getDataType()); // TODO test after we implement this /* N5 array parameter mapping */ - // assertArrayEquals( - // n5Zarr.getAttribute(testZarrDatasetName + "/3x2_c_i8", "dimensions", long[].class), - // new long[]{3, 2}); - // assertArrayEquals( - // n5Zarr.getAttribute(testZarrDatasetName + "/3x2_c_i8", "blockSize", int[].class), - // new int[]{3, 2}); - // assertEquals( - // n5Zarr.getAttribute(testZarrDatasetName + "/3x2_c_i8", "dataType", DataType.class), - // DataType.INT64); - + assertArrayEquals( + n5Zarr.getAttribute(testZarrDatasetName + "/3x2_c_i8", "dimensions", long[].class), + new long[]{3, 2}); + assertArrayEquals( + n5Zarr.getAttribute(testZarrDatasetName + "/3x2_c_i8", "blockSize", int[].class), + new int[]{3, 2}); + assertEquals( + n5Zarr.getAttribute(testZarrDatasetName + "/3x2_c_i8", "dataType", DataType.class), + DataType.INT64); + + /* LE uint8 in C and F order */ final UnsignedByteType refUnsignedByte = new UnsignedByteType(); - assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_u1"), refUnsignedByte); - /* int64 in C order */ + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_u1"), refUnsignedByte); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/3x2_f_u1"), + 0, + 1), + refUnsignedByte); + + /* LE int64 in C and F order */ final LongType refLong = new LongType(); + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_i8"), refLong); - assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_i8"), refLong); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_f_i8"), + 0, + 1), + refLong); - /* int32 in C order */ + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_i8"), refLong); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_f_i8"), + 0, + 1), + refLong); + + /* BE int32 in C and F order */ final UnsignedIntType refUnsignedInt = new UnsignedIntType(); + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_u4"), refUnsignedInt); - assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u4"), refUnsignedInt); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/3x2_f_u4"), + 0, + 1), + refUnsignedInt); - /* float64 in C order */ + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u4"), refUnsignedInt); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/30x20_f_u4"), + 0, + 1), + refUnsignedInt); + + /* LE float64 in C and F order */ final DoubleType refDouble = new DoubleType(); + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_f8"), refDouble); - assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_f8"), refDouble); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/3x2_f_f8"), + 0, + 1), + refDouble); - /* float32 in C order */ + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_f8"), refDouble); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/30x20_f_f8"), + 0, + 1), + refDouble); + + /* BE float32 in C and F order */ final FloatType refFloat = new FloatType(); - assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_f4"), refFloat); - assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_f4"), refFloat); + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/3x2_c_f4"), refFloat); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/3x2_f_f4"), + 0, + 1), + refFloat); - // /* compressors */ - // final UnsignedLongType refUnsignedLong = new UnsignedLongType(); - // - // assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u8_zlib"), refUnsignedLong); - // assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u8_gzip"), refUnsignedLong); - // assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u8_bz2"), refUnsignedLong); - // - // /* fill value 1 */ - // String datasetName = testZarrDatasetName + "/3x2_c_u4_f1"; - // - // final RandomAccessibleInterval a3x2_c_bu4_f1 = N5Utils.open(n5Zarr, datasetName); - // assertIsSequence(a3x2_c_bu4_f1, refUnsignedInt); - // - // DatasetAttributes attributes = n5Zarr.getDatasetAttributes(datasetName); - // final long[] shape = attributes.getDimensions(); - // Arrays.setAll(shape, i -> shape[i] + 5); - // n5Zarr.setAttribute(datasetName, "dimensions", shape); - // - // final RandomAccessibleInterval a3x2_c_bu4_f1_after = N5Utils.open(n5Zarr, datasetName); - // assertIsSequence(Views.interval(a3x2_c_bu4_f1_after, a3x2_c_bu4_f1), refUnsignedInt); - // final RandomAccess ra = a3x2_c_bu4_f1_after.randomAccess(); - // - // /* fill value NaN */ - // datasetName = testZarrDatasetName + "/3x2_c_f4_fnan"; - // - // final RandomAccessibleInterval a3x2_c_lf4_fnan = N5Utils.open(n5Zarr, datasetName); - // assertIsSequence(a3x2_c_lf4_fnan, refFloat); - // - // attributes = n5Zarr.getDatasetAttributes(datasetName); - // final long[] shapef = attributes.getDimensions(); - // Arrays.setAll(shapef, i -> shapef[i] + 5); - // n5Zarr.setAttribute(datasetName, "dimensions", shapef); - // - // final RandomAccessibleInterval a3x2_c_lf4_fnan_after = N5Utils.open(n5Zarr, datasetName); - // assertIsSequence(Views.interval(a3x2_c_lf4_fnan_after, a3x2_c_lf4_fnan), refFloat); - // final RandomAccess raf = a3x2_c_lf4_fnan_after.randomAccess(); - // raf.setPosition(shapef[0] - 5, 0); - // assertTrue(Float.isNaN(raf.get().getRealFloat())); - // raf.setPosition(shapef[1] - 5, 1); - // assertTrue(Float.isNaN(raf.get().getRealFloat())); - + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_f4"), refFloat); + assertIsSequence( + Views.permute( + (RandomAccessibleInterval)N5Utils.open(n5Zarr, testZarrGroupName + "/30x20_f_f4"), + 0, + 1), + refFloat); + + /* compressors */ + final UnsignedLongType refUnsignedLong = new UnsignedLongType(); + + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u8_zlib"), refUnsignedLong); + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u8_gzip"), refUnsignedLong); + assertIsSequence(N5Utils.open(n5Zarr, testZarrDatasetName + "/30x20_c_u8_bz2"), refUnsignedLong); + + /* fill value 1 */ + String datasetName = testZarrGroupName + "/3x2_c_u4_f1"; + + final RandomAccessibleInterval a3x2_c_bu4_f1 = N5Utils.open(n5Zarr, datasetName); + assertIsSequence(a3x2_c_bu4_f1, refUnsignedInt); + + DatasetAttributes attributes = n5Zarr.getDatasetAttributes(datasetName); + final long[] shape = attributes.getDimensions(); + Arrays.setAll(shape, i -> shape[i] + 5); + n5Zarr.setAttribute(datasetName, "dimensions", shape); + + final RandomAccessibleInterval a3x2_c_bu4_f1_after = N5Utils.open(n5Zarr, datasetName); + assertIsSequence(Views.interval(a3x2_c_bu4_f1_after, a3x2_c_bu4_f1), refUnsignedInt); + final net.imglib2.RandomAccess ra = a3x2_c_bu4_f1_after.randomAccess(); + + /* fill value NaN */ + datasetName = testZarrDatasetName + "/3x2_c_f4_fnan"; + + final RandomAccessibleInterval a3x2_c_lf4_fnan = N5Utils.open(n5Zarr, datasetName); + assertIsSequence(a3x2_c_lf4_fnan, refFloat); + + attributes = n5Zarr.getDatasetAttributes(datasetName); + final long[] shapef = attributes.getDimensions(); + Arrays.setAll(shapef, i -> shapef[i] + 5); + n5Zarr.setAttribute(datasetName, "dimensions", shapef); + + final RandomAccessibleInterval a3x2_c_lf4_fnan_after = N5Utils.open(n5Zarr, datasetName); + assertIsSequence(Views.interval(a3x2_c_lf4_fnan_after, a3x2_c_lf4_fnan), refFloat); + final net.imglib2.RandomAccess raf = a3x2_c_lf4_fnan_after.randomAccess(); + raf.setPosition(shapef[0] - 5, 0); + assertTrue(Float.isNaN(raf.get().getRealFloat())); + raf.setPosition(shapef[1] - 5, 1); + assertTrue(Float.isNaN(raf.get().getRealFloat())); + + } - } diff --git a/src/test/python/tensorstore_checksum.py b/src/test/python/tensorstore_checksum.py new file mode 100644 index 0000000..c28c58a --- /dev/null +++ b/src/test/python/tensorstore_checksum.py @@ -0,0 +1,155 @@ +import logging +import tensorstore as ts +from crc32c import crc32c +import numpy as np +import sys +import os +import json + +logger = logging.getLogger(__name__) + +def zarr3_read_and_checksum_array(store_path): + try: + spec = { + 'driver': 'zarr3', + 'kvstore': { + 'driver': 'file', + 'path': store_path + } + } + + store = ts.open(spec).result() + array = store.read().result() + + flat_array = array.flatten() + + checksum = crc32c(flat_array) + + print(f"Checksum for the dataset at {store_path}: {checksum}") + + return checksum + + except Exception as e: + logger.error(f"Error occurred while reading array and calculating checksum: {e}") + raise + +def zarr2_read_and_checksum_array(store_path): + try: + fix_zarray_filters(store_path) + + spec = { + 'driver': 'zarr', + 'kvstore': { + 'driver': 'file', + 'path': store_path + } + } + + store = ts.open(spec).result() + array = store.read().result() + + flat_array = array.flatten() + + #print("Python flat_array: ") + #print(flat_array) + + checksum = crc32c(flat_array) + + print(f"Checksum for the dataset at {store_path}: {checksum}") + + return checksum + + except Exception as e: + logger.error(f"Error occurred while reading array and calculating checksum: {e}") + raise + +def n5_read_and_checksum_array(store_path): + try: + spec = { + 'driver': 'n5', + 'kvstore': { + 'driver': 'file', + 'path': store_path + } + } + + store = ts.open(spec).result() + array = store.read().result() + + flat_array = array.flatten() + + checksum = crc32c(flat_array) + + print(f"Checksum for the dataset at {store_path}: {checksum}") + + return checksum + + except Exception as e: + logger.error(f"Error occurred while reading array and calculating checksum: {e}") + raise + +# Function to load and fix the .zarray metadata +def fix_zarray_filters(store_path): + zarray_path = os.path.join(store_path, ".zarray") + + # Check if .zarray file exists + if not os.path.exists(zarray_path): + raise FileNotFoundError(f"Could not find .zarray file at {zarray_path}") + + # Open the .zarray file and load its content + with open(zarray_path, "r") as zarray_file: + zarray_data = json.load(zarray_file) + + # Fix the filters if they are an empty list + if "filters" in zarray_data and zarray_data["filters"] == []: + print(zarray_data) + zarray_data["filters"] = None + + # Save the updated .zarray file + with open(zarray_path, "w") as zarray_file: + json.dump(zarray_data, zarray_file) + print(f"Fixed filters in .zarray file at {zarray_path}") + else: + print(f"Did not change filters in file at {zarray_path}") + +def main(store_path, *args): + # Determine whether to use N5 or Zarr2 or Zarr3 + args = [arg.lower() for arg in args] + valid_options = ['--zarr3', '--zarr2', '--n5', '--info'] + use_zarr3 = '--zarr3' in args + use_n5 = '--n5' in args + + if any(arg.startswith('--') and arg not in valid_options for arg in args): + raise Exception("Invalid option provided. Valid options are '--zarr3' or '--zarr2' or '--n5' or '--info'.") + + format = 3 if use_zarr3 else 2 if not use_n5 else 'n5' + + try: + if format == 3: + checksum = zarr3_read_and_checksum_array(store_path) + print(f"Final checksum: {checksum}") + elif format == 2: + checksum = zarr2_read_and_checksum_array(store_path) + print(f"Final checksum: {checksum}") + elif format == 'n5': + checksum = n5_read_and_checksum_array(store_path) + print(f"Final checksum: {checksum}") + + except Exception as e: + logger.error(f"Main processing failed: {e}") + sys.exit(3) + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + + if len(sys.argv) < 2: + logger.error("Usage: python tensorstore_checksum.py ") + sys.exit(1) + + #store_path = sys.argv[1] + + try: + main(*sys.argv[1:]) + except Exception as e: + logger.error(f"Main processing failed with exception: {e}", exc_info=True) + sys.exit(3) diff --git a/src/test/python/tensorstore_test.py b/src/test/python/tensorstore_test.py index 568e942..cb95f96 100644 --- a/src/test/python/tensorstore_test.py +++ b/src/test/python/tensorstore_test.py @@ -140,6 +140,14 @@ def ts_create_zarr2_test(zarr2_path, data=None, chunk_shape=None, compression=No if data is None: data = np.arange(np.prod(chunk_shape)).reshape(chunk_shape) + + # Determine the order + if data.flags['C_CONTIGUOUS']: + order = 'C' + elif data.flags['F_CONTIGUOUS']: + order = 'F' + else: + raise ValueError("Data is neither C-contiguous nor F-contiguous.") # TensorStore Zarr2 dtype dtype_str = np.dtype(data.dtype).str @@ -156,7 +164,7 @@ def ts_create_zarr2_test(zarr2_path, data=None, chunk_shape=None, compression=No 'dtype': dtype_str, 'compressor': None, 'fill_value': fill_value, - 'order': 'C' + 'order': order # TODO: Fix F or C order, based on data via data.flags['C_CONTIGUOUS'] or data.flags['F_CONTIGUOUS']: } } diff --git a/src/test/python/zarr_checksum.py b/src/test/python/zarr_checksum.py new file mode 100644 index 0000000..cb2d266 --- /dev/null +++ b/src/test/python/zarr_checksum.py @@ -0,0 +1,124 @@ +import logging +import zarr +from crc32c import crc32c +import numpy as np +import sys +import os +import json + +logger = logging.getLogger(__name__) + +def zarr2_read_and_checksum_array(store_path): + try: + zarr_store = zarr.open(store_path, mode='r') + array = zarr_store[:] + flat_array = array.flatten() + + checksum = crc32c(flat_array) + + print(f"Checksum for the dataset at {store_path}: {checksum}") + return checksum + + except Exception as e: + logger.error(f"Error occurred while reading array and calculating checksum: {e}") + raise + +def zarr3_read_and_checksum_array(store_path): + try: + zarr_store = zarr.open(store_path, mode='r') + array = zarr_store[:] + flat_array = array.flatten() + + checksum = crc32c(flat_array) + + print(f"Checksum for the dataset at {store_path}: {checksum}") + return checksum + + except Exception as e: + logger.error(f"Error occurred while reading array and calculating checksum: {e}") + raise + +def n5_read_and_checksum_array(store_path): + try: + fix_attributes_json(store_path) + + n5_store = zarr.N5FSStore(store_path) + array = zarr.open(store=n5_store, mode='r') + flat_array = array[:].flatten() + + checksum = crc32c(flat_array) + + print(f"Checksum for the N5 dataset at {store_path}: {checksum}") + return checksum + + except Exception as e: + logger.error("Store path: " + store_path) + logger.exception(f"Error occurred while reading N5 array and calculating checksum: {e}") + raise + + +# Function to load and fix the attributes.json metadata +def fix_attributes_json(store_path): + # Define the path to attributes.json + attributes_json_path = os.path.join(store_path, "attributes.json") + + # Check if the file exists + if not os.path.exists(attributes_json_path): + raise FileNotFoundError(f"Could not find attributes.json at {attributes_json_path}") + + # Load the content of attributes.json + with open(attributes_json_path, "r") as file: + attributes_data = json.load(file) + + # Check if the "n5" key is present, if not add it + if "n5" not in attributes_data: + attributes_data["n5"] = "4.0.0" + print(f"Added 'n5': '4.0.0' to {attributes_json_path}") + + # Write the modified data back to attributes.json + with open(attributes_json_path, "w") as file: + json.dump(attributes_data, file, indent=4) + else: + print(f"'n5' version already exists in {attributes_json_path}") + + +def main(store_path, *args): + args = [arg.lower() for arg in args] + valid_options = ['--zarr3', '--zarr2', '--n5'] + + use_zarr3 = '--zarr3' in args + use_n5 = '--n5' in args + + if any(arg.startswith('--') and arg not in valid_options for arg in args): + raise Exception("Invalid option provided. Valid options are '--zarr3', '--zarr2', or '--n5'.") + + #format = 3 if use_zarr3 else 2 + format = 3 if use_zarr3 else (5 if use_n5 else 2) + + try: + if format == 3: + checksum = zarr3_read_and_checksum_array(store_path) + print(f"Final checksum: {checksum}") + elif format == 2: + checksum = zarr2_read_and_checksum_array(store_path) + print(f"Final checksum: {checksum}") + elif format == 5: + checksum = n5_read_and_checksum_array(store_path) + print(f"Final checksum: {checksum}") + + except Exception as e: + logger.error(f"Main processing failed: {e}") + sys.exit(3) + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + + if len(sys.argv) < 2: + logger.error("Usage: python zarr_checksum.py ") + sys.exit(1) + + try: + main(*sys.argv[1:]) + except Exception as e: + logger.error(f"Main processing failed with exception: {e}", exc_info=True) + sys.exit(3)