diff --git a/README.md b/README.md index cb3fd386..456e276c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # jHDF - Pure Java HDF5 library -[![jHDF CI](https://github.com/jamesmudd/jhdf/actions/workflows/ci.yml/badge.svg)](https://github.com/jamesmudd/jhdf/actions/workflows/ci.yml) [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=jamesmudd_jhdf&metric=coverage)](https://sonarcloud.io/dashboard?id=jamesmudd_jhdf) [![Maven Central](https://img.shields.io/maven-central/v/io.jhdf/jhdf.svg?label=Maven%20Central)](https://search.maven.org/artifact/io.jhdf/jhdf) [![Javadocs](http://javadoc.io/badge/io.jhdf/jhdf.svg)](http://javadoc.io/doc/io.jhdf/jhdf) [![JetBrains Supported](https://img.shields.io/badge/supported-project.svg?label=&colorA=grey&colorB=orange&logo=data%3Aimage%2Fsvg%2Bxml%3Bbase64%2CPD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz48c3ZnIHZlcnNpb249IjEuMSIgaWQ9IkxheWVyXzEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHg9IjBweCIgeT0iMHB4IiB3aWR0aD0iMTRweCIgaGVpZ2h0PSIxNHB4IiB2aWV3Qm94PSIwIDAgMTQgMTQiIGVuYWJsZS1iYWNrZ3JvdW5kPSJuZXcgMCAwIDE0IDE0IiB4bWw6c3BhY2U9InByZXNlcnZlIj48cmVjdCB4PSIxIiB5PSIxMiIgZmlsbD0iI0ZGRkZGRiIgd2lkdGg9IjciIGhlaWdodD0iMSIvPjxwYXRoIGZpbGw9IiNGRkZGRkYiIGQ9Ik0wLjMsNy4zbDEtMS4xYzAuNCwwLjUsMC44LDAuNywxLjMsMC43YzAuNiwwLDEtMC40LDEtMS4yVjFoMS42djQuN2MwLDAuOS0wLjIsMS41LTAuNywxLjlDNC4xLDguMSwzLjQsOC40LDIuNiw4LjRDMS41LDguNCwwLjgsNy45LDAuMyw3LjN6Ii8%2BPHBhdGggZmlsbD0iI0ZGRkZGRiIgZD0iTTYuOCwxaDMuNGMwLjgsMCwxLjUsMC4yLDEuOSwwLjZjMC4zLDAuMywwLjUsMC43LDAuNSwxLjJsMCwwYzAsMC44LTAuNCwxLjMtMSwxLjZDMTIuNSw0LjgsMTMsNS4zLDEzLDYuMmwwLDBjMCwxLjMtMS4xLDItMi43LDJINi44VjF6IE0xMSwzLjFjMC0wLjUtMC40LTAuNy0xLTAuN0g4LjR2MS41aDEuNUMxMC42LDMuOSwxMSwzLjcsMTEsMy4xTDExLDMuMXogTTEwLjIsNS4zSDguNHYxLjZoMS45YzAuNywwLDEuMS0wLjIsMS4xLTAuOGwwLDBDMTEuNCw1LjYsMTEuMSw1LjMsMTAuMiw1LjN6Ii8%2BPHJlY3QgeD0iMSIgeT0iMTIiIGZpbGw9IiNGRkZGRkYiIHdpZHRoPSI3IiBoZWlnaHQ9IjEiLz48L3N2Zz4%3D)](https://www.jetbrains.com/?from=jhdf) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3996097.svg)](https://doi.org/10.5281/zenodo.3996097) +[![jHDF CI](https://github.com/jamesmudd/jhdf/actions/workflows/ci.yml/badge.svg)](https://github.com/jamesmudd/jhdf/actions/workflows/ci.yml) [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=jamesmudd_jhdf&metric=coverage)](https://sonarcloud.io/dashboard?id=jamesmudd_jhdf) [![Maven Central](https://img.shields.io/maven-central/v/io.jhdf/jhdf.svg?label=Maven%20Central)](https://central.sonatype.com/artifact/io.jhdf/jhdf) [![Javadocs](http://javadoc.io/badge/io.jhdf/jhdf.svg)](http://javadoc.io/doc/io.jhdf/jhdf) [![JetBrains Supported](https://img.shields.io/badge/supported-project.svg?label=&colorA=grey&colorB=orange&logo=data%3Aimage%2Fsvg%2Bxml%3Bbase64%2CPD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz48c3ZnIHZlcnNpb249IjEuMSIgaWQ9IkxheWVyXzEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHg9IjBweCIgeT0iMHB4IiB3aWR0aD0iMTRweCIgaGVpZ2h0PSIxNHB4IiB2aWV3Qm94PSIwIDAgMTQgMTQiIGVuYWJsZS1iYWNrZ3JvdW5kPSJuZXcgMCAwIDE0IDE0IiB4bWw6c3BhY2U9InByZXNlcnZlIj48cmVjdCB4PSIxIiB5PSIxMiIgZmlsbD0iI0ZGRkZGRiIgd2lkdGg9IjciIGhlaWdodD0iMSIvPjxwYXRoIGZpbGw9IiNGRkZGRkYiIGQ9Ik0wLjMsNy4zbDEtMS4xYzAuNCwwLjUsMC44LDAuNywxLjMsMC43YzAuNiwwLDEtMC40LDEtMS4yVjFoMS42djQuN2MwLDAuOS0wLjIsMS41LTAuNywxLjlDNC4xLDguMSwzLjQsOC40LDIuNiw4LjRDMS41LDguNCwwLjgsNy45LDAuMyw3LjN6Ii8%2BPHBhdGggZmlsbD0iI0ZGRkZGRiIgZD0iTTYuOCwxaDMuNGMwLjgsMCwxLjUsMC4yLDEuOSwwLjZjMC4zLDAuMywwLjUsMC43LDAuNSwxLjJsMCwwYzAsMC44LTAuNCwxLjMtMSwxLjZDMTIuNSw0LjgsMTMsNS4zLDEzLDYuMmwwLDBjMCwxLjMtMS4xLDItMi43LDJINi44VjF6IE0xMSwzLjFjMC0wLjUtMC40LTAuNy0xLTAuN0g4LjR2MS41aDEuNUMxMC42LDMuOSwxMSwzLjcsMTEsMy4xTDExLDMuMXogTTEwLjIsNS4zSDguNHYxLjZoMS45YzAuNywwLDEuMS0wLjIsMS4xLTAuOGwwLDBDMTEuNCw1LjYsMTEuMSw1LjMsMTAuMiw1LjN6Ii8%2BPHJlY3QgeD0iMSIgeT0iMTIiIGZpbGw9IiNGRkZGRkYiIHdpZHRoPSI3IiBoZWlnaHQ9IjEiLz48L3N2Zz4%3D)](https://www.jetbrains.com/?from=jhdf) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3996097.svg)](https://doi.org/10.5281/zenodo.3996097) This project is a pure Java implementation for accessing HDF5 files. It is written from the file format specification and is not using any HDF Group code, it is *not* a wrapper around the C libraries. The file format specification is available from the HDF Group [here](https://docs.hdfgroup.org/hdf5/v1_10/_f_m_t3.html). More information on the format is available on [Wikipedia](https://en.wikipedia.org/wiki/Hierarchical_Data_Format). I presented a webinar about jHDF for the HDF Group which is available on [YouTube](https://www.youtube.com/watch?v=pM8FICF4z_U) the example code used and slides can be found [here](https://github.com/jamesmudd/jhdf-webinar). @@ -31,7 +31,7 @@ See [WriteHdf5.java](jhdf/src/main/java/io/jhdf/examples/WriteHdf5.java) for a For more examples see package [io.jhdf.examples](jhdf/src/main/java/io/jhdf/examples) ## Why should I use jHDF? -- Easy integration with JVM based projects. The library is available on [Maven Central](https://search.maven.org/search?q=g:%22io.jhdf%22%20AND%20a:%22jhdf%22), and [GitHub Packages](https://github.com/jamesmudd/jhdf/packages/), so using it should be as easy as adding any other dependency. To use the libraries supplied by the HDF Group you need to load native code, which means you need to handle this in your build, and it complicates distribution of your software on multiple platforms. +- Easy integration with JVM based projects. The library is available on [Maven Central](https://central.sonatype.com/artifact/io.jhdf/jhdf), and [GitHub Packages](https://github.com/jamesmudd/jhdf/packages/), so using it should be as easy as adding any other dependency. To use the libraries supplied by the HDF Group you need to load native code, which means you need to handle this in your build, and it complicates distribution of your software on multiple platforms. - The API design intends to be familiar to Java programmers, so hopefully it works as you might expect. (If this is not the case, open an issue with suggestions for improvement) - No use of JNI, so you avoid all the issues associated with calling native code from the JVM. - Fully debug-able you can step fully through the library with a Java debugger. diff --git a/jhdf/build.gradle b/jhdf/build.gradle index ef608793..0a91a19f 100644 --- a/jhdf/build.gradle +++ b/jhdf/build.gradle @@ -21,7 +21,7 @@ plugins { // Community plugins (need versions) id 'org.sonarqube' version '4.0.0.2929' // Code quality - id "com.github.spotbugs" version "6.0.22" // Static analysis + id "com.github.spotbugs" version "6.0.23" // Static analysis id "me.champeau.jmh" version "0.7.2" // JMH support } @@ -55,7 +55,7 @@ dependencies { implementation group: 'org.lz4', name: 'lz4-java', version: '1.8.0' // Use JUnit 5 test framework - testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter', version: '5.11.0' + testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter', version: '5.11.1' testRuntimeOnly group: 'org.slf4j', name: 'slf4j-simple', version: '1.7.36' // Mocking @@ -67,8 +67,8 @@ dependencies { // Alternative bitshuffle impl to check results against testImplementation 'org.xerial.snappy:snappy-java:1.1.10.7' // For parsing h5dump XML output - testImplementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.17.2' - testImplementation 'commons-io:commons-io:2.16.1' + testImplementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.18.0' + testImplementation 'commons-io:commons-io:2.17.0' } test { @@ -104,7 +104,8 @@ jar { 'Bundle-Name': project.name, 'Bundle-Vendor': 'James Mudd', 'Bundle-Version': project.version, - 'Export-Package': 'io.jhdf,io.jhdf.*', + 'Export-Package': 'io.jhdf,io.jhdf.*,io.jhdf.api,io.jhdf.api.*', + 'Require-Bundle': 'slf4j.api;bundle-version="1.7.36",org.apache.commons.lang3;bundle-version="3.17.0",com.ning.compress-lzf;bundle-version="1.1.2",lz4-java;bundle-version="1.8.0"', // Build data 'Build-JDK': System.getProperty('java.vendor') + ' ' + System.getProperty('java.version'), 'Build-OS': System.getProperty('os.name') + ' ' + System.getProperty('os.version'), diff --git a/jhdf/src/main/java/io/jhdf/api/dataset/ChunkedDataset.java b/jhdf/src/main/java/io/jhdf/api/dataset/ChunkedDataset.java index 1887b055..99d6e562 100644 --- a/jhdf/src/main/java/io/jhdf/api/dataset/ChunkedDataset.java +++ b/jhdf/src/main/java/io/jhdf/api/dataset/ChunkedDataset.java @@ -37,4 +37,13 @@ public interface ChunkedDataset extends Dataset { */ ByteBuffer getRawChunkBuffer(int[] chunkOffset); + /** + * Gets the decompressed byte array for the specified chunk. + * + * @param chunkOffset the offset of the required chunk + * @return the decompressed byte array for this chunk + * @throws HdfException If the chunk offset is not valid for this dataset + */ + byte[] getDecompressedChunk(int[] chunkOffset); + } diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java index dbebf256..e67e32fb 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java @@ -291,6 +291,16 @@ public ByteBuffer getRawChunkBuffer(int[] chunkOffset) { return getDataBuffer(chunk); } + @Override + public byte[] getDecompressedChunk(int[] chunkOffset) { + final Chunk chunk = getChunk(new ChunkOffset(chunkOffset)); + if (chunk == null) { + throw new HdfException("No chunk with offset " + Arrays.toString(chunkOffset) + + " in dataset: " + getPath()); + } + return decompressChunk(chunk); + } + private Collection getAllChunks() { return getChunkLookup().values(); } diff --git a/jhdf/src/test/java/io/jhdf/dataset/ChunkedV4DatasetTest.java b/jhdf/src/test/java/io/jhdf/dataset/ChunkedV4DatasetTest.java index a9dca899..1bafd6cd 100644 --- a/jhdf/src/test/java/io/jhdf/dataset/ChunkedV4DatasetTest.java +++ b/jhdf/src/test/java/io/jhdf/dataset/ChunkedV4DatasetTest.java @@ -33,8 +33,10 @@ import static io.jhdf.Utils.flatten; import static org.apache.commons.lang3.ArrayUtils.toObject; import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.arrayContaining; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.isA; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -118,4 +120,39 @@ void testGettingRawChunk() { intBuffer.get(chunkData); assertThat(toObject(chunkData), is(arrayContaining(0, 1, 2, 3, 4, 5))); } + @Test + void testGettingDecompressedChunkWithoutFiltersIsTheSameAsRaw() { + Dataset dataset = hdfFile.getDatasetByPath("/fixed_array/int32"); + assertThat(dataset, isA(ChunkedDataset.class)); + ChunkedDataset chunkedDataset = (ChunkedDataset) dataset; + assertThat(toObject(chunkedDataset.getChunkDimensions()), is(arrayContaining(2, 3))); + + ByteBuffer rawChunkBuffer = chunkedDataset.getRawChunkBuffer(new int[]{0, 0}); + byte[] decompressedChunkBytes = chunkedDataset.getDecompressedChunk(new int[]{0, 0}); + byte[] rawChunkBytes = new byte[rawChunkBuffer.capacity()]; + rawChunkBuffer.get(rawChunkBytes); + assertThat(rawChunkBytes, is(decompressedChunkBytes)); + } + + @Test + void testGettingDecompressedChunkWithFilters() { + Dataset dataset = hdfFile.getDatasetByPath("/filtered_fixed_array/int32"); + assertThat(dataset, isA(ChunkedDataset.class)); + ChunkedDataset chunkedDataset = (ChunkedDataset) dataset; + assertThat(toObject(chunkedDataset.getChunkDimensions()), is(arrayContaining(2, 3))); + assertThat(chunkedDataset.getFilters(), hasSize(1)); + + // Check the compressed and decompressed chunks are different + ByteBuffer rawChunkBuffer = chunkedDataset.getRawChunkBuffer(new int[]{0, 0}); + byte[] decompressedChunkBytes = chunkedDataset.getDecompressedChunk(new int[]{0, 0}); + byte[] rawChunkBytes = new byte[rawChunkBuffer.capacity()]; + rawChunkBuffer.get(rawChunkBytes); + assertThat(rawChunkBytes, is(not(decompressedChunkBytes))); + + IntBuffer intBuffer = ByteBuffer.wrap(decompressedChunkBytes).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer(); + int[] chunkData = new int[intBuffer.capacity()]; + intBuffer.get(chunkData); + + assertThat(toObject(chunkData), is(arrayContaining(0, 1, 2, 3, 4, 5))); + } }