From 5af7d9263b77aeeb1dfd93ace24ac33b6b1455e9 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 26 Nov 2024 16:47:15 -0500 Subject: [PATCH 01/14] feat: toward shallow metadata discovery * not requireing listing --- .../n5/universe/N5DatasetDiscoverer.java | 78 +++++++++++++++++- .../ome/ngff/v04/OmeNgffMetadataParser.java | 79 ++++++++++++++----- 2 files changed, 136 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index 8873ca1..fbe2512 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -29,6 +29,7 @@ import java.text.Collator; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -51,6 +52,7 @@ import org.janelia.saalfeldlab.n5.universe.metadata.N5SingleScaleMetadataParser; import org.janelia.saalfeldlab.n5.universe.metadata.N5ViewerMultiscaleMetadataParser; import org.janelia.saalfeldlab.n5.universe.metadata.canonical.CanonicalMetadataParser; +import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMetadata; import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMetadataParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,6 +97,10 @@ public class N5DatasetDiscoverer { new CanonicalMetadataParser(), }; + public static final N5MetadataParser[] DEFAULT_SHALLOW_GROUP_PARSERS = new N5MetadataParser[] { + new OmeNgffMetadataParser(true) + }; + private final List> metadataParsers; private final List> groupParsers; @@ -322,10 +328,47 @@ public static void parseMetadata(final N5Reader n5, final N5TreeNode node, } } } + + /** + * Parses metadata for a node using the given parsers, stopping after the first + * success. + * + * @param n5 the N5Reader + * @param node the tree node + * @param metadataParsers list of metadata parsers + * @param groupParsers list of group parsers + * @throws IOException the exception + */ + public static void parseMetadataShallow(final N5Reader n5, final N5TreeNode node, + final List> metadataParsers, final List> groupParsers) + throws IOException { + + // Go through all parsers to populate metadata + for (final N5MetadataParser parser : metadataParsers) { + try { + Optional parsedMeta; + parsedMeta = parser.apply(n5, node); + + parsedMeta.ifPresent(node::setMetadata); + if (parsedMeta.isPresent()) + break; + } catch (final Exception ignored) { + } + } - public static boolean trim(final N5TreeNode node ) { - return trim( node, x -> {}); - } + // this may be a group (e.g. multiscale pyramid) try to parse groups + for (final N5MetadataParser gp : groupParsers) { + final Optional groupMeta = gp.apply(n5, node); + groupMeta.ifPresent(node::setMetadata); + if (groupMeta.isPresent()) + break; + } + } + + public static boolean trim(final N5TreeNode node) { + return trim(node, x -> { + }); + } /** * Removes branches of the N5 container tree that do not contain any nodes that can be opened @@ -590,4 +633,33 @@ public static N5TreeNode discover(final N5Reader n5 ) { Arrays.asList(DEFAULT_GROUP_PARSERS)); } + /** + * Discovers metadata present at the provided dataset without listing. + * + * @param n5 the reader + * @param dataset the dataset + * @return + */ + public static N5TreeNode discoverShallow(final N5Reader n5, final String dataset) { + + final N5TreeNode node = new N5TreeNode(dataset); + + try { + parseMetadataShallow(n5, node, Arrays.asList(DEFAULT_PARSERS), Arrays.asList(DEFAULT_SHALLOW_GROUP_PARSERS)); + } catch (IOException e) { } + + return node; + } + + /** + * Discovers metadata present at the root without listing + * + * @param n5 the reader + * @return + */ + public static N5TreeNode discoverShallow(final N5Reader n5) { + + return discoverShallow(n5, "/"); + } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataParser.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataParser.java index 0364973..b05dd64 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataParser.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataParser.java @@ -29,9 +29,12 @@ public class OmeNgffMetadataParser implements N5MetadataParser, private final Gson gson; - public OmeNgffMetadataParser(final boolean reverse) { + protected final boolean assumeChildren; + + public OmeNgffMetadataParser(final boolean assumeChildren) { gson = gsonBuilder().create(); + this.assumeChildren = assumeChildren; } public OmeNgffMetadataParser() { @@ -65,16 +68,65 @@ public Optional parseMetadata(final N5Reader n5, final N5TreeNo int nd = -1; final Map scaleLevelNodes = new HashMap<>(); - for (final N5TreeNode childNode : node.childrenList()) { - if (childNode.isDataset() && childNode.getMetadata() != null) { - scaleLevelNodes.put(childNode.getPath(), childNode); - if (nd < 0) - nd = ((N5DatasetMetadata) childNode.getMetadata()).getAttributes().getNumDimensions(); + + DatasetAttributes[] attrs = null; + if( assumeChildren ) { + + for (int j = 0; j < multiscales.length; j++) { + + final OmeNgffMultiScaleMetadata ms = multiscales[j]; + + nd = ms.getAxes().length; + + final int numScales = ms.datasets.length; + attrs = new DatasetAttributes[numScales]; + for (int i = 0; i < numScales; i++) { + + // TODO check existence here or elsewhere? + final N5TreeNode child = new N5TreeNode( + MetadataUtils.canonicalPath(node, ms.getDatasets()[i].path)); + final DatasetAttributes dsetAttrs = n5.getDatasetAttributes(child.getPath()); + if (dsetAttrs == null) + return Optional.empty(); + + attrs[i] = dsetAttrs; + node.childrenList().add(child); + } + + final NgffSingleScaleAxesMetadata[] msChildrenMeta = OmeNgffMultiScaleMetadata.buildMetadata(nd, + node.getPath(), ms.datasets, attrs, ms.coordinateTransformations, ms.metadata, ms.axes); + + // add to scale level nodes map + node.childrenList().forEach(n -> { + scaleLevelNodes.put(n.getPath(), n); + }); } - } - if (nd < 0) - return Optional.empty(); + } else { + + for (final N5TreeNode childNode : node.childrenList()) { + if (childNode.isDataset() && childNode.getMetadata() != null) { + scaleLevelNodes.put(childNode.getPath(), childNode); + if (nd < 0) + nd = ((N5DatasetMetadata) childNode.getMetadata()).getAttributes().getNumDimensions(); + } + } + + if (nd < 0) + return Optional.empty(); + + for (int j = 0; j < multiscales.length; j++) { + + final OmeNgffMultiScaleMetadata ms = multiscales[j]; + final String[] paths = ms.getPaths(); + attrs = new DatasetAttributes[ms.getPaths().length]; + final N5DatasetMetadata[] dsetMeta = new N5DatasetMetadata[paths.length]; + for (int i = 0; i < paths.length; i++) { + dsetMeta[i] = ((N5DatasetMetadata)scaleLevelNodes.get(MetadataUtils.canonicalPath(node, paths[i])).getMetadata()); + attrs[i] = dsetMeta[i].getAttributes(); + } + } + } /* * Need to replace all children with new children with the metadata from @@ -84,15 +136,6 @@ public Optional parseMetadata(final N5Reader n5, final N5TreeNo final OmeNgffMultiScaleMetadata ms = multiscales[j]; - final String[] paths = ms.getPaths(); - final DatasetAttributes[] attrs = new DatasetAttributes[ms.getPaths().length]; - - final N5DatasetMetadata[] dsetMeta = new N5DatasetMetadata[paths.length]; - for (int i = 0; i < paths.length; i++) { - dsetMeta[i] = ((N5DatasetMetadata)scaleLevelNodes.get(MetadataUtils.canonicalPath(node, paths[i])).getMetadata()); - attrs[i] = dsetMeta[i].getAttributes(); - } - // maybe axes can be flipped first? ArrayUtils.reverse(ms.axes); From a64961754d47ddb19f60374b645810620f0d9a04 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 6 Dec 2024 10:54:42 -0500 Subject: [PATCH 02/14] build: bump dependencies --- pom.xml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index 6e38c1c..c4f7e77 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ org.scijava pom-scijava - 37.0.0 + 39.0.0 @@ -111,18 +111,12 @@ sign,deploy-to-scijava - 3.2.0 - 4.2.0 - 2.2.0 - 7.0.0 - 4.1.0 - 1.3.4 - 1.0.0-preview.20191208 1.4.1 0.2.5 2.2.2 + 1.1.1 @@ -205,7 +199,7 @@ com.googlecode.json-simple json-simple - 1.1.1 + ${json-simple.version} test From 5b2a34b5b41da275f86a2d7a68fab4ac2a78bbe3 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 6 Dec 2024 11:01:08 -0500 Subject: [PATCH 03/14] build: bump version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c4f7e77..2292535 100644 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ org.janelia.saalfeldlab n5-universe - 1.6.1-SNAPSHOT + 1.7.0-SNAPSHOT N5-Universe Utilities spanning all of the N5 repositories From c3dd7b0e7f16291dfe677f0ba4ea4d3cd128d19a Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 6 Dec 2024 10:56:58 -0500 Subject: [PATCH 04/14] refactor: elevate KeyValueAccessBackend and expose factory getter --- .../n5/universe/KeyValueAccessBackend.java | 116 +++++++++++++ .../saalfeldlab/n5/universe/N5Factory.java | 162 ++---------------- .../n5/universe/StorageFormat.java | 94 ++++++++++ .../n5/universe/N5FactoryTests.java | 45 +++-- .../n5/universe/N5StorageTests.java | 5 +- .../universe/StorageSchemeWrappedN5Test.java | 2 +- .../n5/universe/ZarrStorageTests.java | 5 +- 7 files changed, 248 insertions(+), 181 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/universe/KeyValueAccessBackend.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/universe/StorageFormat.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/KeyValueAccessBackend.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/KeyValueAccessBackend.java new file mode 100644 index 0000000..3cb61e8 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/KeyValueAccessBackend.java @@ -0,0 +1,116 @@ +package org.janelia.saalfeldlab.n5.universe; + +import com.amazonaws.services.s3.AmazonS3; +import org.janelia.saalfeldlab.googlecloud.GoogleCloudStorageURI; +import org.janelia.saalfeldlab.googlecloud.GoogleCloudUtils; +import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.N5Reader; +import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.googlecloud.GoogleCloudStorageKeyValueAccess; +import org.janelia.saalfeldlab.n5.s3.AmazonS3KeyValueAccess; +import org.janelia.saalfeldlab.n5.s3.AmazonS3Utils; + +import javax.annotation.Nullable; +import java.net.URI; +import java.nio.file.FileSystems; +import java.util.function.BiFunction; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +/** + * Enum to discover and provide {@link KeyValueAccess} for {@link N5Reader}s and {@link N5Writer}s. + * IMPORTANT: If ever new {@link KeyValueAccess} backends are adding, they MUST be re-ordered + * such that the earliest predicates are the most restrictive, and the later predicates + * are the least restrictive. This ensures that when iterating through the values of + * {@link KeyValueAccessBackend} you can test them in order, and stop at the first + * {@link KeyValueAccess} that is generated. + */ +public enum KeyValueAccessBackend implements Predicate, BiFunction { + GOOGLE_CLOUD(uri -> { + final String scheme = uri.getScheme(); + final boolean hasScheme = scheme != null; + return hasScheme && GoogleCloudUtils.GS_SCHEME.asPredicate().test(scheme) + || hasScheme && N5Factory.HTTPS_SCHEME.asPredicate().test(scheme) + && uri.getHost() != null && GoogleCloudUtils.GS_HOST.asPredicate().test(uri.getHost()); + }, KeyValueAccessBackend::newGoogleCloudKeyValueAccess), + AWS(uri -> { + final String scheme = uri.getScheme(); + final boolean hasScheme = scheme != null; + return hasScheme && AmazonS3Utils.S3_SCHEME.asPredicate().test(scheme) + || uri.getHost() != null && hasScheme && N5Factory.HTTPS_SCHEME.asPredicate().test(scheme); + }, KeyValueAccessBackend::newAmazonS3KeyValueAccess), + FILE(uri -> { + final String scheme = uri.getScheme(); + final boolean hasScheme = scheme != null; + return !hasScheme || N5Factory.FILE_SCHEME.asPredicate().test(scheme); + }, KeyValueAccessBackend::newFileSystemKeyValueAccess); + + private final Predicate backendTest; + private final BiFunction backendGenerator; + + KeyValueAccessBackend(Predicate test, BiFunction generator) { + + backendTest = test; + backendGenerator = generator; + } + + @Override public KeyValueAccess apply(final URI uri, final N5Factory factory) { + + if (test(uri)) + return backendGenerator.apply(uri, factory); + return null; + } + + @Override public boolean test(URI uri) { + + return backendTest.test(uri); + } + + /** + * Test the provided {@link URI} to and return the appropriate {@link KeyValueAccess}. + * If no appropriate {@link KeyValueAccess} is found, may be null + * + * @param uri to create a {@link KeyValueAccess} from. + * @return the {@link KeyValueAccess} and container path, or null if none are valid + */ + @Nullable + public static KeyValueAccess getKeyValueAccess(final URI uri) { + + return getKeyValueAccess(uri, N5Factory.FACTORY); + } + + @Nullable + static KeyValueAccess getKeyValueAccess(final URI uri, final N5Factory factory) { + + /*NOTE: The order of these tests is very important, as the predicates for each + * backend take into account reasonable defaults when possible. + * Here we test from most to least restrictive. + * See the Javadoc for more details. */ + for (final KeyValueAccessBackend backend : KeyValueAccessBackend.values()) { + final KeyValueAccess kva = backend.apply(uri, factory); + if (kva != null) + return kva; + } + return null; + } + + private static GoogleCloudStorageKeyValueAccess newGoogleCloudKeyValueAccess(final URI uri, final N5Factory factory) { + + final GoogleCloudStorageURI googleCloudUri = new GoogleCloudStorageURI(uri); + return new GoogleCloudStorageKeyValueAccess(factory.createGoogleCloudStorage(), googleCloudUri, true); + } + + private static AmazonS3KeyValueAccess newAmazonS3KeyValueAccess(final URI uri, final N5Factory factory) { + + final String uriString = uri.toString(); + final AmazonS3 s3 = factory.createS3(uriString); + + return new AmazonS3KeyValueAccess(s3, uri, true); + } + + private static FileSystemKeyValueAccess newFileSystemKeyValueAccess(final URI uri, final N5Factory factory) { + + return new FileSystemKeyValueAccess(FileSystems.getDefault()); + } +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java index a808a95..43120b3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java @@ -26,21 +26,16 @@ */ package org.janelia.saalfeldlab.n5.universe; -import java.io.File; import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; -import java.nio.file.FileSystems; import java.nio.file.Paths; import java.util.function.BiFunction; -import java.util.function.Predicate; -import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.annotation.Nullable; import org.apache.commons.lang3.function.TriFunction; -import org.janelia.saalfeldlab.googlecloud.GoogleCloudStorageURI; import org.janelia.saalfeldlab.googlecloud.GoogleCloudUtils; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; import org.janelia.saalfeldlab.n5.KeyValueAccess; @@ -50,11 +45,8 @@ import org.janelia.saalfeldlab.n5.N5Reader; import org.janelia.saalfeldlab.n5.N5URI; import org.janelia.saalfeldlab.n5.N5Writer; -import org.janelia.saalfeldlab.n5.googlecloud.GoogleCloudStorageKeyValueAccess; -import org.janelia.saalfeldlab.n5.hdf5.HDF5Utils; import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Reader; import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Writer; -import org.janelia.saalfeldlab.n5.s3.AmazonS3KeyValueAccess; import org.janelia.saalfeldlab.n5.s3.AmazonS3Utils; import org.janelia.saalfeldlab.n5.zarr.N5ZarrReader; import org.janelia.saalfeldlab.n5.zarr.N5ZarrWriter; @@ -69,7 +61,6 @@ import com.google.gson.GsonBuilder; import net.imglib2.util.Pair; -import net.imglib2.util.ValuePair; /** * Factory for various N5 readers and writers. Implementation specific @@ -83,11 +74,11 @@ */ public class N5Factory implements Serializable { - private static final N5Factory FACTORY = new N5Factory(); + static final N5Factory FACTORY = new N5Factory(); private static final long serialVersionUID = -6823715427289454617L; - private final static Pattern HTTPS_SCHEME = Pattern.compile("http(s)?", Pattern.CASE_INSENSITIVE); - private final static Pattern FILE_SCHEME = Pattern.compile("file", Pattern.CASE_INSENSITIVE); + final static Pattern HTTPS_SCHEME = Pattern.compile("http(s)?", Pattern.CASE_INSENSITIVE); + final static Pattern FILE_SCHEME = Pattern.compile("file", Pattern.CASE_INSENSITIVE); private int[] hdf5DefaultBlockSize = {64, 64, 64, 1, 1}; private boolean hdf5OverrideBlockSize = false; private GsonBuilder gsonBuilder = new GsonBuilder(); @@ -102,25 +93,6 @@ public class N5Factory implements Serializable { private boolean s3Anonymous = true; private String s3Endpoint; - private static GoogleCloudStorageKeyValueAccess newGoogleCloudKeyValueAccess(final URI uri, final N5Factory factory) { - - final GoogleCloudStorageURI googleCloudUri = new GoogleCloudStorageURI(uri); - return new GoogleCloudStorageKeyValueAccess(factory.createGoogleCloudStorage(), googleCloudUri, true); - } - - private static AmazonS3KeyValueAccess newAmazonS3KeyValueAccess(final URI uri, final N5Factory factory) { - - final String uriString = uri.toString(); - final AmazonS3 s3 = factory.createS3(uriString); - - return new AmazonS3KeyValueAccess(s3, uri, true); - } - - private static FileSystemKeyValueAccess newFileSystemKeyValueAccess(final URI uri, final N5Factory factory) { - - return new FileSystemKeyValueAccess(FileSystems.getDefault()); - } - public N5Factory hdf5DefaultBlockSize(final int... blockSize) { hdf5DefaultBlockSize = blockSize; @@ -225,26 +197,21 @@ Storage createGoogleCloudStorage() { return GoogleCloudUtils.createGoogleCloudStorage(googleCloudProjectId); } + + /** * Test the provided {@link URI} to and return the appropriate {@link KeyValueAccess}. * If no appropriate {@link KeyValueAccess} is found, may be null + *

+ * This differs subtly from {@link KeyValueAccessBackend#getKeyValueAccess(URI)} in that + * the resulting {@link KeyValueAccess} may use configured fields from this {@link N5Factory}. * * @param uri to create a {@link KeyValueAccess} from. * @return the {@link KeyValueAccess} and container path, or null if none are valid */ @Nullable - KeyValueAccess getKeyValueAccess(final URI uri) { - - /*NOTE: The order of these tests is very important, as the predicates for each - * backend take into account reasonable defaults when possible. - * Here we test from most to least restrictive. - * See the Javadoc for more details. */ - for (final KeyValueAccessBackend backend : KeyValueAccessBackend.values()) { - final KeyValueAccess kva = backend.apply(uri, this); - if (kva != null) - return kva; - } - return null; + public KeyValueAccess getKeyValueAccess(final URI uri) { + return KeyValueAccessBackend.getKeyValueAccess(uri ,this); } /** @@ -569,113 +536,6 @@ private T openN5Container( return openN5Container(null, uri, openWithKva); } - /** - * Enum to discover and provide {@link KeyValueAccess} for {@link N5Reader}s and {@link N5Writer}s. - * IMPORTANT: If ever new {@link KeyValueAccess} backends are adding, they MUST be re-ordered - * such that the earliest predicates are the most restrictive, and the later predicates - * are the least restrictive. This ensures that when iterating through the values of - * {@link KeyValueAccessBackend} you can test them in order, and stop at the first - * {@link KeyValueAccess} that is generated. - */ - enum KeyValueAccessBackend implements Predicate, BiFunction { - GOOGLE_CLOUD(uri -> { - final String scheme = uri.getScheme(); - final boolean hasScheme = scheme != null; - return hasScheme && GoogleCloudUtils.GS_SCHEME.asPredicate().test(scheme) - || hasScheme && HTTPS_SCHEME.asPredicate().test(scheme) - && uri.getHost() != null && GoogleCloudUtils.GS_HOST.asPredicate().test(uri.getHost()); - }, N5Factory::newGoogleCloudKeyValueAccess), - AWS(uri -> { - final String scheme = uri.getScheme(); - final boolean hasScheme = scheme != null; - return hasScheme && AmazonS3Utils.S3_SCHEME.asPredicate().test(scheme) - || uri.getHost() != null && hasScheme && HTTPS_SCHEME.asPredicate().test(scheme); - }, N5Factory::newAmazonS3KeyValueAccess), - FILE(uri -> { - final String scheme = uri.getScheme(); - final boolean hasScheme = scheme != null; - return !hasScheme || FILE_SCHEME.asPredicate().test(scheme); - }, N5Factory::newFileSystemKeyValueAccess); - - private final Predicate backendTest; - private final BiFunction backendGenerator; - - KeyValueAccessBackend(Predicate test, BiFunction generator) { - - backendTest = test; - backendGenerator = generator; - } - - @Override public KeyValueAccess apply(final URI uri, final N5Factory factory) { - - if (test(uri)) - return backendGenerator.apply(uri, factory); - return null; - } - - @Override public boolean test(URI uri) { - - return backendTest.test(uri); - } - } - - public enum StorageFormat { - ZARR(Pattern.compile("zarr", Pattern.CASE_INSENSITIVE), uri -> Pattern.compile("\\.zarr$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), - N5(Pattern.compile("n5", Pattern.CASE_INSENSITIVE), uri -> Pattern.compile("\\.n5$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), - HDF5(Pattern.compile("h(df)?5", Pattern.CASE_INSENSITIVE), uri -> { - final boolean hasHdf5Extension = Pattern.compile("\\.h(df)?5$", Pattern.CASE_INSENSITIVE).matcher(uri.getPath()).find(); - return hasHdf5Extension || HDF5Utils.isHDF5(uri.getPath()); - }); - - static final Pattern STORAGE_SCHEME_PATTERN = Pattern.compile("^(\\s*(?(n5|h(df)?5|zarr)):(//)?)?(?.*)$", Pattern.CASE_INSENSITIVE); - private final static String STORAGE_SCHEME_GROUP = "storageScheme"; - private final static String URI_GROUP = "uri"; - - final Pattern schemePattern; - private final Predicate uriTest; - - StorageFormat(final Pattern schemePattern, final Predicate test) { - - this.schemePattern = schemePattern; - this.uriTest = test; - } - - public static StorageFormat guessStorageFromUri(URI uri) { - - for (final StorageFormat format : StorageFormat.values()) { - if (format.uriTest.test(uri)) - return format; - } - return null; - } - - public static Pair parseUri(String uri) throws URISyntaxException { - - final Pair storageFromScheme = getStorageFromNestedScheme(uri); - final URI asUri = parseUriFromString(storageFromScheme.getB()); - if (storageFromScheme.getA() != null) - return new ValuePair<>(storageFromScheme.getA(), asUri); - else - return new ValuePair<>(guessStorageFromUri(asUri), asUri); - - } - - public static Pair getStorageFromNestedScheme(String uri) { - - final Matcher storageSchemeMatcher = StorageFormat.STORAGE_SCHEME_PATTERN.matcher(uri); - storageSchemeMatcher.matches(); - final String storageFormatScheme = storageSchemeMatcher.group(STORAGE_SCHEME_GROUP); - final String uriGroup = storageSchemeMatcher.group(URI_GROUP); - if (storageFormatScheme != null) { - for (final StorageFormat format : StorageFormat.values()) { - if (format.schemePattern.asPredicate().test(storageFormatScheme)) - return new ValuePair<>(format, uriGroup); - } - } - return new ValuePair<>(null, uriGroup); - } - } - /** * Creates an N5 writer for the specified container URI with default N5Factory configuration. * @@ -698,7 +558,7 @@ public static N5Reader createReader(String containerUri) { return FACTORY.openReader(containerUri); } - private static URI parseUriFromString(String uri) { + static URI parseUriFromString(String uri) { try { return URI.create(uri); } catch (final Throwable ignore) {} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/StorageFormat.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/StorageFormat.java new file mode 100644 index 0000000..03d5527 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/StorageFormat.java @@ -0,0 +1,94 @@ +package org.janelia.saalfeldlab.n5.universe; + +import net.imglib2.util.Pair; +import net.imglib2.util.ValuePair; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.hdf5.HDF5Utils; + +import javax.annotation.Nullable; +import java.io.File; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public enum StorageFormat { + ZARR(Pattern.compile("zarr", Pattern.CASE_INSENSITIVE), uri -> Pattern.compile("\\.zarr$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), + N5(Pattern.compile("n5", Pattern.CASE_INSENSITIVE), uri -> Pattern.compile("\\.n5$", Pattern.CASE_INSENSITIVE).matcher(new File(uri.getPath()).toString()).find()), + HDF5(Pattern.compile("h(df)?5", Pattern.CASE_INSENSITIVE), uri -> { + final boolean hasHdf5Extension = Pattern.compile("\\.h(df)?5$", Pattern.CASE_INSENSITIVE).matcher(uri.getPath()).find(); + return hasHdf5Extension || HDF5Utils.isHDF5(uri.getPath()); + }); + + static final Pattern STORAGE_SCHEME_PATTERN = Pattern.compile("^(\\s*(?(n5|h(df)?5|zarr)):(//)?)?(?.*)$", Pattern.CASE_INSENSITIVE); + private final static String STORAGE_SCHEME_GROUP = "storageScheme"; + private final static String URI_GROUP = "uri"; + + final Pattern schemePattern; + private final Predicate uriTest; + + StorageFormat(final Pattern schemePattern, final Predicate test) { + + this.schemePattern = schemePattern; + this.uriTest = test; + } + + public static StorageFormat guessStorageFromUri(URI uri) { + + for (final StorageFormat format : StorageFormat.values()) { + if (format.uriTest.test(uri)) + return format; + } + return null; + } + + public static Pair parseUri(String uri) throws URISyntaxException { + + final Pair storageFromScheme = getStorageFromNestedScheme(uri); + final URI asUri = N5Factory.parseUriFromString(storageFromScheme.getB()); + if (storageFromScheme.getA() != null) + return new ValuePair<>(storageFromScheme.getA(), asUri); + else + return new ValuePair<>(guessStorageFromUri(asUri), asUri); + + } + + public static Pair getStorageFromNestedScheme(String uri) { + + final Matcher storageSchemeMatcher = StorageFormat.STORAGE_SCHEME_PATTERN.matcher(uri); + storageSchemeMatcher.matches(); + final String storageFormatScheme = storageSchemeMatcher.group(STORAGE_SCHEME_GROUP); + final String uriGroup = storageSchemeMatcher.group(URI_GROUP); + if (storageFormatScheme != null) { + for (final StorageFormat format : StorageFormat.values()) { + if (format.schemePattern.asPredicate().test(storageFormatScheme)) + return new ValuePair<>(format, uriGroup); + } + } + return new ValuePair<>(null, uriGroup); + } + + private static final String ZARRAY = ".zarray"; + private static final String ZGROUP = ".zgroup"; + private static final String ZATTRS = ".zattrs"; + private static final String[] ZARR2_KEYS = new String[]{ZARRAY, ZGROUP, ZATTRS}; + private static final String Z3ATTRS = ".zattrs"; + private static final String N5_ATTRIBUTES = "attributes.json"; + + public static @Nullable StorageFormat guessStorageFromKeys(final URI root, final KeyValueAccess kva) { + final URI uri; + if (root.isAbsolute()) + uri = root; + else + uri = URI.create("file://" + root); + if (Arrays.stream(ZARR2_KEYS).anyMatch(it -> kva.exists(kva.compose(uri, it)))) + return StorageFormat.ZARR; + else if (kva.exists(kva.compose(uri, N5_ATTRIBUTES))) + return StorageFormat.N5; + else return null; + } +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java index f84884a..1fe4653 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java @@ -7,7 +7,6 @@ import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Reader; import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Writer; -import org.janelia.saalfeldlab.n5.universe.N5Factory.StorageFormat; import org.janelia.saalfeldlab.n5.zarr.ZarrKeyValueReader; import org.janelia.saalfeldlab.n5.zarr.ZarrKeyValueWriter; import org.junit.Test; @@ -40,39 +39,39 @@ public void testStorageFormatGuesses() throws URISyntaxException { final URI zarrExtSlash = new URI("file:///tmp/a.zarr/"); final URI unknownExt = new URI("file:///tmp/a.abc"); - assertNull("no extension null", N5Factory.StorageFormat.guessStorageFromUri(noExt)); + assertNull("no extension null", StorageFormat.guessStorageFromUri(noExt)); /** * h5 tests fail now because these test whether the file exists. It * should not do that, if, for example, we're making a writer. */ - assertEquals("h5 extension == h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); - assertNotEquals("h5 extension != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); - assertNotEquals("h5 extension != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(h5Ext)); + assertEquals("h5 extension == h5", StorageFormat.HDF5, StorageFormat.guessStorageFromUri(h5Ext)); + assertNotEquals("h5 extension != n5", StorageFormat.N5, StorageFormat.guessStorageFromUri(h5Ext)); + assertNotEquals("h5 extension != zarr", StorageFormat.ZARR, StorageFormat.guessStorageFromUri(h5Ext)); - assertEquals("hdf5 extension == h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); - assertNotEquals("hdf5 extension != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); - assertNotEquals("hdf5 extension != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(hdf5Ext)); + assertEquals("hdf5 extension == h5", StorageFormat.HDF5, StorageFormat.guessStorageFromUri(hdf5Ext)); + assertNotEquals("hdf5 extension != n5", StorageFormat.N5, StorageFormat.guessStorageFromUri(hdf5Ext)); + assertNotEquals("hdf5 extension != zarr", StorageFormat.ZARR, StorageFormat.guessStorageFromUri(hdf5Ext)); - assertNotEquals("n5 extension != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); - assertEquals("n5 extension == n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); - assertNotEquals("n5 extension != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(n5Ext)); + assertNotEquals("n5 extension != h5", StorageFormat.HDF5, StorageFormat.guessStorageFromUri(n5Ext)); + assertEquals("n5 extension == n5", StorageFormat.N5, StorageFormat.guessStorageFromUri(n5Ext)); + assertNotEquals("n5 extension != zarr", StorageFormat.ZARR, StorageFormat.guessStorageFromUri(n5Ext)); - assertNotEquals("n5 extension slash != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); - assertEquals("n5 extension slash == n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); - assertNotEquals("n5 extension slash != zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(n5ExtSlash)); + assertNotEquals("n5 extension slash != h5", StorageFormat.HDF5, StorageFormat.guessStorageFromUri(n5ExtSlash)); + assertEquals("n5 extension slash == n5", StorageFormat.N5, StorageFormat.guessStorageFromUri(n5ExtSlash)); + assertNotEquals("n5 extension slash != zarr", StorageFormat.ZARR, StorageFormat.guessStorageFromUri(n5ExtSlash)); - assertNotEquals("zarr extension != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); - assertNotEquals("zarr extension != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); - assertEquals("zarr extension == zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(zarrExt)); + assertNotEquals("zarr extension != h5", StorageFormat.HDF5, StorageFormat.guessStorageFromUri(zarrExt)); + assertNotEquals("zarr extension != n5", StorageFormat.N5, StorageFormat.guessStorageFromUri(zarrExt)); + assertEquals("zarr extension == zarr", StorageFormat.ZARR, StorageFormat.guessStorageFromUri(zarrExt)); - assertNotEquals("zarr extension slash != h5", StorageFormat.HDF5, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); - assertNotEquals("zarr extension slash != n5", StorageFormat.N5, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); - assertEquals("zarr extension slash == zarr", StorageFormat.ZARR, N5Factory.StorageFormat.guessStorageFromUri(zarrExtSlash)); + assertNotEquals("zarr extension slash != h5", StorageFormat.HDF5, StorageFormat.guessStorageFromUri(zarrExtSlash)); + assertNotEquals("zarr extension slash != n5", StorageFormat.N5, StorageFormat.guessStorageFromUri(zarrExtSlash)); + assertEquals("zarr extension slash == zarr", StorageFormat.ZARR, StorageFormat.guessStorageFromUri(zarrExtSlash)); - assertNull("unknown extension != h5", N5Factory.StorageFormat.guessStorageFromUri(unknownExt)); - assertNull("unknown extension != n5", N5Factory.StorageFormat.guessStorageFromUri(unknownExt)); - assertNull("unknown extension != zarr", N5Factory.StorageFormat.guessStorageFromUri(unknownExt)); + assertNull("unknown extension != h5", StorageFormat.guessStorageFromUri(unknownExt)); + assertNull("unknown extension != n5", StorageFormat.guessStorageFromUri(unknownExt)); + assertNull("unknown extension != zarr", StorageFormat.guessStorageFromUri(unknownExt)); } @Test diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5StorageTests.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5StorageTests.java index 33cb6e7..a143b56 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5StorageTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5StorageTests.java @@ -1,7 +1,6 @@ package org.janelia.saalfeldlab.n5.universe; import com.amazonaws.services.s3.AmazonS3; -import com.google.cloud.storage.Bucket; import com.google.cloud.storage.Storage; import com.google.gson.GsonBuilder; import org.janelia.saalfeldlab.n5.AbstractN5Test; @@ -58,9 +57,9 @@ public N5FactoryTest() { return factory; } - @Override public N5Factory.StorageFormat getStorageFormat() { + @Override public StorageFormat getStorageFormat() { - return N5Factory.StorageFormat.N5; + return StorageFormat.N5; } @Override protected N5Reader createN5Reader(String location, GsonBuilder gson) { diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/StorageSchemeWrappedN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/StorageSchemeWrappedN5Test.java index 0336647..9e8a38a 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/StorageSchemeWrappedN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/StorageSchemeWrappedN5Test.java @@ -17,7 +17,7 @@ public interface StorageSchemeWrappedN5Test { N5Factory getFactory(); - N5Factory.StorageFormat getStorageFormat(); + StorageFormat getStorageFormat(); Class getBackendTargetClass(); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/ZarrStorageTests.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/ZarrStorageTests.java index 3f7f9a1..e35d105 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/ZarrStorageTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/ZarrStorageTests.java @@ -1,7 +1,6 @@ package org.janelia.saalfeldlab.n5.universe; import com.amazonaws.services.s3.AmazonS3; -import com.google.cloud.storage.Bucket; import com.google.cloud.storage.Storage; import com.google.gson.GsonBuilder; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; @@ -55,9 +54,9 @@ public ZarrFactoryTest() { return factory; } - @Override public N5Factory.StorageFormat getStorageFormat() { + @Override public StorageFormat getStorageFormat() { - return N5Factory.StorageFormat.ZARR; + return StorageFormat.ZARR; } @Override protected N5Writer createN5Writer() { From 3e7d0907d23ee9eb69b872264517fb21239d80ad Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 6 Dec 2024 10:57:30 -0500 Subject: [PATCH 05/14] feat: if ambiguous, try to guess StorageFormat from keys --- .../org/janelia/saalfeldlab/n5/universe/N5Factory.java | 9 ++++++--- .../janelia/saalfeldlab/n5/universe/N5FactoryTests.java | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java index 43120b3..f539b68 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java @@ -503,7 +503,9 @@ private T openN5ContainerWithBackend( final Pair formatAndUri = StorageFormat.parseUri(containerUri); final URI uri = formatAndUri.getB(); final KeyValueAccess kva = backend.apply(uri, this); - return openWithBackend.apply(formatAndUri.getA(), kva, uri.toString()); + StorageFormat format = formatAndUri.getA(); + format = format != null? format : StorageFormat.guessStorageFromKeys(uri, kva); + return openWithBackend.apply(format, kva, uri.toString()); } private T openN5Container( @@ -511,10 +513,11 @@ private T openN5Container( final URI uri, final TriFunction openWithKva) { - final KeyValueAccess kva = getKeyValueAccess(uri); + final KeyValueAccess kva = KeyValueAccessBackend.getKeyValueAccess(uri); if (kva == null) throw new N5Exception("Cannot get KeyValueAccess at " + uri); - return openWithKva.apply(storageFormat, kva, uri.toString()); + final StorageFormat format = storageFormat != null ? storageFormat : StorageFormat.guessStorageFromKeys(uri, kva); + return openWithKva.apply(format, kva, uri.toString()); } private T openN5Container( diff --git a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java index 1fe4653..de8ba69 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/universe/N5FactoryTests.java @@ -191,10 +191,10 @@ public void testDefaultForAmbiguousWriters() throws IOException { final Class[] writerTypes = new Class[]{ null, N5HDF5Writer.class, - ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class, - ZarrKeyValueWriter.class + ZarrKeyValueWriter.class, //valid zarr, correct by key match + N5KeyValueWriter.class, //valid n5, correct by key match + ZarrKeyValueWriter.class, // empty directory, create new zarr + ZarrKeyValueWriter.class //directory doesn't exist, create new zarr }; for (int i = 0; i < paths.length; i++) { From aa7d7b505b09a178d432fefe1f77b2c89feedbca Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 6 Dec 2024 13:04:32 -0500 Subject: [PATCH 06/14] fix: use instance method, not static for getting KVA --- .../java/org/janelia/saalfeldlab/n5/universe/N5Factory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java index f539b68..1250688 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java @@ -513,7 +513,7 @@ private T openN5Container( final URI uri, final TriFunction openWithKva) { - final KeyValueAccess kva = KeyValueAccessBackend.getKeyValueAccess(uri); + final KeyValueAccess kva = getKeyValueAccess(uri); if (kva == null) throw new N5Exception("Cannot get KeyValueAccess at " + uri); final StorageFormat format = storageFormat != null ? storageFormat : StorageFormat.guessStorageFromKeys(uri, kva); From 6fb95ee22de920fb4f9ed61fb6d92463e6757eb8 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 10 Dec 2024 15:46:07 -0500 Subject: [PATCH 07/14] fix: ngff correct (reversed) axis order --- .../v04/OmeNgffMetadataSingleScaleParser.java | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataSingleScaleParser.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataSingleScaleParser.java index 1b1a295..f2e4a01 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataSingleScaleParser.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/metadata/ome/ngff/v04/OmeNgffMetadataSingleScaleParser.java @@ -1,5 +1,6 @@ package org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04; +import java.util.Collections; import java.util.Optional; import java.util.Map.Entry; @@ -10,6 +11,7 @@ import org.janelia.saalfeldlab.n5.universe.metadata.N5MetadataWriter; import com.google.gson.Gson; +import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; @@ -20,7 +22,7 @@ public class OmeNgffMetadataSingleScaleParser implements N5MetadataParser e : jsonElem.entrySet() ) n5.setAttribute(path, e.getKey(), e.getValue()); } @@ -40,12 +50,13 @@ public void writeMetadata(NgffSingleScaleAxesMetadata t, N5Writer n5, String pat @Override public Optional parseMetadata(N5Reader n5, N5TreeNode node) { - // TODO implement me -// try { -// return Optional.of(n5.getAttribute(node.getNodeName(), null, NgffSingleScaleAxesMetadata.class)); -// } catch( final N5Exception e ) { - return Optional.empty(); -// } + /* + * not clear what this method should do, if anything. + * because according to spec, metadata are defined only min "multiscales". + * + * So, return empty. + */ + return Optional.empty(); } } From 0decb012e7110445b30c10e99e830493ca542ed1 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 10 Dec 2024 15:55:43 -0500 Subject: [PATCH 08/14] style: N5DatasetDiscoverer code formatting --- .../n5/universe/N5DatasetDiscoverer.java | 935 +++++++++--------- 1 file changed, 471 insertions(+), 464 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index fbe2512..7b2b694 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -26,6 +26,7 @@ package org.janelia.saalfeldlab.n5.universe; import java.io.IOException; +import java.io.InputStream; import java.text.Collator; import java.util.ArrayList; import java.util.Arrays; @@ -42,7 +43,11 @@ import java.util.function.Consumer; import java.util.function.Predicate; +import org.janelia.saalfeldlab.googlecloud.GoogleCloudStorageClient; +import org.janelia.saalfeldlab.googlecloud.GoogleCloudUtils; +import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.N5Reader; +import org.janelia.saalfeldlab.n5.googlecloud.GoogleCloudStorageKeyValueAccess; import org.janelia.saalfeldlab.n5.universe.metadata.N5CosemMetadataParser; import org.janelia.saalfeldlab.n5.universe.metadata.N5CosemMultiScaleMetadata; import org.janelia.saalfeldlab.n5.universe.metadata.N5GenericSingleScaleMetadataParser; @@ -54,25 +59,37 @@ import org.janelia.saalfeldlab.n5.universe.metadata.canonical.CanonicalMetadataParser; import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMetadata; import org.janelia.saalfeldlab.n5.universe.metadata.ome.ngff.v04.OmeNgffMetadataParser; +import org.janelia.saalfeldlab.n5.zarr.ZarrKeyValueReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.auth.oauth2.GoogleCredentials; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Acl.User; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.BlobInfo; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonElement; + import se.sawano.java.text.AlphanumericComparator; /** * This class aids in detecting and parsing datsets in an N5 container. *

- * An N5DatasetDiscoverer specifies the types of {@link N5MetadataParser}s - * to attempt, and an {@link ExecutorService} that enables parsing in parallel. - * The parsers are passed to the constructor in a list. - * Group parsers are called after all others are called, and should - * be used when a parsers result depends on its children. + * An N5DatasetDiscoverer specifies the types of {@link N5MetadataParser}s to + * attempt, and an {@link ExecutorService} that enables parsing in parallel. The + * parsers are passed to the constructor in a list. Group parsers are called + * after all others are called, and should be used when a parsers result depends + * on its children. *

* The {@link discoverAndParseRecursive} method returns a {@link N5TreeNode} - * containing all child nodes, each of which contains pased metadata. - * For each group/dataset, the parsers will be called in order, - * and will return the first non-empty result. As such - * parsers should be ordered from most- to least-strict. + * containing all child nodes, each of which contains pased metadata. For each + * group/dataset, the parsers will be called in order, and will return the first + * non-empty result. As such parsers should be ordered from most- to + * least-strict. * * @author Caleb Hulbert * @author John Bogovic @@ -80,255 +97,202 @@ */ public class N5DatasetDiscoverer { - private static final Logger LOG = LoggerFactory.getLogger(N5DatasetDiscoverer.class); - - - public static final N5MetadataParser[] DEFAULT_PARSERS = new N5MetadataParser[] { - new N5CosemMetadataParser(), - new N5SingleScaleMetadataParser(), - new CanonicalMetadataParser(), - new N5GenericSingleScaleMetadataParser() - }; - - public static final N5MetadataParser[] DEFAULT_GROUP_PARSERS = new N5MetadataParser[] { - new OmeNgffMetadataParser(), - new N5CosemMultiScaleMetadata.CosemMultiScaleParser(), - new N5ViewerMultiscaleMetadataParser(), - new CanonicalMetadataParser(), - }; - - public static final N5MetadataParser[] DEFAULT_SHALLOW_GROUP_PARSERS = new N5MetadataParser[] { - new OmeNgffMetadataParser(true) - }; - - private final List> metadataParsers; - private final List> groupParsers; - - private final Comparator comparator; - - private final Predicate filter; - - private final ExecutorService executor; - - private N5TreeNode root; - - private String groupSeparator; - - private N5Reader n5; - - /** - * Creates an N5 discoverer with alphanumeric sorting order of groups/datasets (such as, s9 goes before s10). - * - * @param executor the executor - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer(final ExecutorService executor, - final List> metadataParsers, - final List> groupParsers) { - - this(executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, - executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - /** - * Creates an N5 discoverer. - * - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer( - final List> metadataParsers, - final List> groupParsers) { - - this(Executors.newSingleThreadExecutor(), - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - /** - * Creates an N5 discoverer. - * - * @param n5 n5 reader - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer(final N5Reader n5, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, - Executors.newSingleThreadExecutor(), - Optional.of(new AlphanumericComparator(Collator.getInstance())), - null, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final ExecutorService executor, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this(executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - filter, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, - executor, - Optional.of(new AlphanumericComparator(Collator.getInstance())), - filter, - metadataParsers, - groupParsers); - } - - public N5DatasetDiscoverer( - final ExecutorService executor, - final Optional> comparator, - final List> metadataParsers, - final List> groupParsers) { - - this(executor, comparator, null, metadataParsers, groupParsers); - } - - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final Optional> comparator, - final List> metadataParsers, - final List> groupParsers) { - - this(n5, executor, comparator, null, metadataParsers, groupParsers); - } - - /** - * Creates an N5 discoverer. - *

- * If the optional parameter {@code comparator} is specified, the groups and datasets - * will be listed in the order determined by this comparator. - * - * @param executor the executor - * @param comparator optional string comparator - * @param filter the dataset filter - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer( - final ExecutorService executor, - final Optional> comparator, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this.executor = executor; - this.comparator = comparator.orElseGet(null); - this.filter = filter; - this.metadataParsers = metadataParsers; - this.groupParsers = groupParsers; - } - - /** - * Creates an N5 discoverer. - *

- * If the optional parameter {@code comparator} is specified, the groups and datasets - * will be listed in the order determined by this comparator. - * - * @param n5 the n5 reader - * @param executor the executor - * @param comparator optional string comparator - * @param filter the dataset filter - * @param metadataParsers metadata parsers - * @param groupParsers group parsers - */ - public N5DatasetDiscoverer( - final N5Reader n5, - final ExecutorService executor, - final Optional> comparator, - final Predicate filter, - final List> metadataParsers, - final List> groupParsers) { - - this.n5 = n5; - this.executor = executor; - this.comparator = comparator.orElseGet(null); - this.filter = filter; - this.metadataParsers = metadataParsers; - this.groupParsers = groupParsers; - } - - public static void parseMetadata(final N5Reader n5, final N5TreeNode node, - final List> metadataParsers) throws IOException { - - parseMetadata(n5, node, metadataParsers, new ArrayList<>()); - } - - /** - * Parses metadata for a node using the given parsers, stopping after the first success. - * - * @param n5 the N5Reader - * @param node the tree node - * @param metadataParsers list of metadata parsers - * @param groupParsers list of group parsers - * @throws IOException the exception - */ - public static void parseMetadata(final N5Reader n5, final N5TreeNode node, - final List> metadataParsers, - final List> groupParsers) throws IOException { - - // Go through all parsers to populate metadata - for (final N5MetadataParser parser : metadataParsers) { - try { - Optional parsedMeta; - parsedMeta = parser.apply(n5, node); - - parsedMeta.ifPresent(node::setMetadata); - if (parsedMeta.isPresent()) - break; - } catch (final Exception ignored) { - } - } - - // this may be a group (e.g. multiscale pyramid) try to parse groups - if ((node.getMetadata() == null) && !node.childrenList().isEmpty() && groupParsers != null) { - for (final N5MetadataParser gp : groupParsers) { - final Optional groupMeta = gp.apply(n5, node); - groupMeta.ifPresent(node::setMetadata); - if (groupMeta.isPresent()) - break; - } - } - } - + private static final Logger LOG = LoggerFactory.getLogger(N5DatasetDiscoverer.class); + + public static final N5MetadataParser[] DEFAULT_PARSERS = new N5MetadataParser[] { new N5CosemMetadataParser(), + new N5SingleScaleMetadataParser(), new CanonicalMetadataParser(), + new N5GenericSingleScaleMetadataParser() }; + + public static final N5MetadataParser[] DEFAULT_GROUP_PARSERS = new N5MetadataParser[] { + new OmeNgffMetadataParser(), new N5CosemMultiScaleMetadata.CosemMultiScaleParser(), + new N5ViewerMultiscaleMetadataParser(), new CanonicalMetadataParser(), }; + + public static final N5MetadataParser[] DEFAULT_SHALLOW_GROUP_PARSERS = new N5MetadataParser[] { + new OmeNgffMetadataParser(true) }; + + private final List> metadataParsers; + private final List> groupParsers; + + private final Comparator comparator; + + private final Predicate filter; + + private final ExecutorService executor; + + private N5TreeNode root; + + private String groupSeparator; + + private N5Reader n5; + + /** + * Creates an N5 discoverer with alphanumeric sorting order of groups/datasets + * (such as, s9 goes before s10). + * + * @param executor the executor + * @param metadataParsers metadata parsers + * @param groupParsers group parsers + */ + public N5DatasetDiscoverer(final ExecutorService executor, final List> metadataParsers, + final List> groupParsers) { + + this(executor, Optional.of(new AlphanumericComparator(Collator.getInstance())), null, metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer(final N5Reader n5, final ExecutorService executor, + final List> metadataParsers, final List> groupParsers) { + + this(n5, executor, Optional.of(new AlphanumericComparator(Collator.getInstance())), null, metadataParsers, + groupParsers); + } + + /** + * Creates an N5 discoverer. + * + * @param metadataParsers metadata parsers + * @param groupParsers group parsers + */ + public N5DatasetDiscoverer(final List> metadataParsers, + final List> groupParsers) { + + this(Executors.newSingleThreadExecutor(), Optional.of(new AlphanumericComparator(Collator.getInstance())), null, + metadataParsers, groupParsers); + } + + /** + * Creates an N5 discoverer. + * + * @param n5 n5 reader + * @param metadataParsers metadata parsers + * @param groupParsers group parsers + */ + public N5DatasetDiscoverer(final N5Reader n5, final List> metadataParsers, + final List> groupParsers) { + + this(n5, Executors.newSingleThreadExecutor(), Optional.of(new AlphanumericComparator(Collator.getInstance())), + null, metadataParsers, groupParsers); + } + + public N5DatasetDiscoverer(final ExecutorService executor, final Predicate filter, + final List> metadataParsers, final List> groupParsers) { + + this(executor, Optional.of(new AlphanumericComparator(Collator.getInstance())), filter, metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer(final N5Reader n5, final ExecutorService executor, final Predicate filter, + final List> metadataParsers, final List> groupParsers) { + + this(n5, executor, Optional.of(new AlphanumericComparator(Collator.getInstance())), filter, metadataParsers, + groupParsers); + } + + public N5DatasetDiscoverer(final ExecutorService executor, final Optional> comparator, + final List> metadataParsers, final List> groupParsers) { + + this(executor, comparator, null, metadataParsers, groupParsers); + } + + public N5DatasetDiscoverer(final N5Reader n5, final ExecutorService executor, + final Optional> comparator, final List> metadataParsers, + final List> groupParsers) { + + this(n5, executor, comparator, null, metadataParsers, groupParsers); + } + + /** + * Creates an N5 discoverer. + *

+ * If the optional parameter {@code comparator} is specified, the groups and + * datasets will be listed in the order determined by this comparator. + * + * @param executor the executor + * @param comparator optional string comparator + * @param filter the dataset filter + * @param metadataParsers metadata parsers + * @param groupParsers group parsers + */ + public N5DatasetDiscoverer(final ExecutorService executor, final Optional> comparator, + final Predicate filter, final List> metadataParsers, + final List> groupParsers) { + + this.executor = executor; + this.comparator = comparator.orElseGet(null); + this.filter = filter; + this.metadataParsers = metadataParsers; + this.groupParsers = groupParsers; + } + + /** + * Creates an N5 discoverer. + *

+ * If the optional parameter {@code comparator} is specified, the groups and + * datasets will be listed in the order determined by this comparator. + * + * @param n5 the n5 reader + * @param executor the executor + * @param comparator optional string comparator + * @param filter the dataset filter + * @param metadataParsers metadata parsers + * @param groupParsers group parsers + */ + public N5DatasetDiscoverer(final N5Reader n5, final ExecutorService executor, + final Optional> comparator, final Predicate filter, + final List> metadataParsers, final List> groupParsers) { + + this.n5 = n5; + this.executor = executor; + this.comparator = comparator.orElseGet(null); + this.filter = filter; + this.metadataParsers = metadataParsers; + this.groupParsers = groupParsers; + } + + public static void parseMetadata(final N5Reader n5, final N5TreeNode node, + final List> metadataParsers) throws IOException { + + parseMetadata(n5, node, metadataParsers, new ArrayList<>()); + } + + /** + * Parses metadata for a node using the given parsers, stopping after the first + * success. + * + * @param n5 the N5Reader + * @param node the tree node + * @param metadataParsers list of metadata parsers + * @param groupParsers list of group parsers + * @throws IOException the exception + */ + public static void parseMetadata(final N5Reader n5, final N5TreeNode node, + final List> metadataParsers, final List> groupParsers) + throws IOException { + + // Go through all parsers to populate metadata + for (final N5MetadataParser parser : metadataParsers) { + try { + Optional parsedMeta; + parsedMeta = parser.apply(n5, node); + + parsedMeta.ifPresent(node::setMetadata); + if (parsedMeta.isPresent()) + break; + } catch (final Exception ignored) { + } + } + + // this may be a group (e.g. multiscale pyramid) try to parse groups + if ((node.getMetadata() == null) && !node.childrenList().isEmpty() && groupParsers != null) { + for (final N5MetadataParser gp : groupParsers) { + final Optional groupMeta = gp.apply(n5, node); + groupMeta.ifPresent(node::setMetadata); + if (groupMeta.isPresent()) + break; + } + } + } + /** * Parses metadata for a node using the given parsers, stopping after the first * success. @@ -370,254 +334,267 @@ public static boolean trim(final N5TreeNode node) { }); } - /** - * Removes branches of the N5 container tree that do not contain any nodes that can be opened - * (nodes with metadata). - * - * @param node the node - * @param callback the callback function - * @return {@code true} if the branch contains a node that can be opened, {@code false} otherwise - */ - public static boolean trim(final N5TreeNode node, final Consumer callback ) { + /** + * Removes branches of the N5 container tree that do not contain any nodes that + * can be opened (nodes with metadata). + * + * @param node the node + * @param callback the callback function + * @return {@code true} if the branch contains a node that can be opened, + * {@code false} otherwise + */ + public static boolean trim(final N5TreeNode node, final Consumer callback) { - final List children = node.childrenList(); - if (children.isEmpty()) { - return node.getMetadata() != null; - } + final List children = node.childrenList(); + if (children.isEmpty()) { + return node.getMetadata() != null; + } - boolean ret = false; - for (final Iterator it = children.iterator(); it.hasNext(); ) { - final N5TreeNode childNode = it.next(); - if (!trim(childNode, callback)) { - it.remove(); - callback.accept(childNode); - } else { - ret = true; - } + boolean ret = false; + for (final Iterator it = children.iterator(); it.hasNext();) { + final N5TreeNode childNode = it.next(); + if (!trim(childNode, callback)) { + it.remove(); + callback.accept(childNode); + } else { + ret = true; + } + } + + return ret || node.getMetadata() != null; } - return ret || node.getMetadata() != null; - } + public static void sort(final N5TreeNode node, final Comparator comparator, + final Consumer callback) { - public static void sort(final N5TreeNode node, final Comparator comparator, - final Consumer callback) { + final List children = node.childrenList(); + children.sort(Comparator.comparing(N5TreeNode::toString, comparator)); - final List children = node.childrenList(); - children.sort(Comparator.comparing(N5TreeNode::toString, comparator)); + if (callback != null) { + callback.accept(node); + } - if( callback != null ) { - callback.accept( node ); + for (final N5TreeNode childNode : node.childrenList()) { + sort(childNode, comparator, callback); + } } - for (final N5TreeNode childNode : node.childrenList()) { - sort(childNode, comparator, callback ); + public void sort(final N5TreeNode node, final Consumer callback) { + if (comparator != null) { + sort(node, comparator, callback); + } } - } - public void sort(final N5TreeNode node, final Consumer callback) { - if (comparator != null) { - sort(node, comparator, callback); + public void sort(final N5TreeNode node) { + if (comparator != null) + sort(node, comparator, null); } - } - - public void sort(final N5TreeNode node) { - if (comparator != null) - sort(node, comparator, null); - } - /** - * Recursively discovers and parses metadata for datasets that are children - * of the given base path using {@link N5Reader#deepList}. Returns an {@link N5TreeNode} - * that can be displayed as a JTree. - * - * @param base the base path - * @return the n5 tree node - * @throws IOException the io exception - */ - public N5TreeNode discoverAndParseRecursive(final String base ) throws IOException { + /** + * Recursively discovers and parses metadata for datasets that are children of + * the given base path using {@link N5Reader#deepList}. Returns an + * {@link N5TreeNode} that can be displayed as a JTree. + * + * @param base the base path + * @return the n5 tree node + * @throws IOException the io exception + */ + public N5TreeNode discoverAndParseRecursive(final String base) throws IOException { - return discoverAndParseRecursive(base, x -> {}); - } + return discoverAndParseRecursive(base, x -> { + }); + } - public N5TreeNode discoverAndParseRecursive(final String base, final Consumer callback ) throws IOException { + public N5TreeNode discoverAndParseRecursive(final String base, final Consumer callback) + throws IOException { groupSeparator = n5.getGroupSeparator(); root = new N5TreeNode(base); - discoverAndParseRecursive(root, callback ); + discoverAndParseRecursive(root, callback); return root; } - public N5TreeNode discoverAndParseRecursive(final N5TreeNode root ) throws IOException { - return discoverAndParseRecursive( root, x -> {}) ; - } + public N5TreeNode discoverAndParseRecursive(final N5TreeNode root) throws IOException { + return discoverAndParseRecursive(root, x -> { + }); + } - public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consumer callback ) throws IOException { + public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consumer callback) + throws IOException { - groupSeparator = n5.getGroupSeparator(); + groupSeparator = n5.getGroupSeparator(); - String[] datasetPaths; - try { - datasetPaths = n5.deepList(root.getPath(), executor); - N5TreeNode.fromFlatList(root, datasetPaths, groupSeparator); - } catch (final Exception e) { - return null; - } - callback.accept(root); + String[] datasetPaths; + try { + datasetPaths = n5.deepList(root.getPath(), executor); + N5TreeNode.fromFlatList(root, datasetPaths, groupSeparator); + } catch (final Exception e) { + return null; + } + callback.accept(root); - parseMetadataRecursive(root,callback); - sortAndTrimRecursive(root,callback); + parseMetadataRecursive(root, callback); + sortAndTrimRecursive(root, callback); - return root; - } + return root; + } - /** - * Returns the name of the dataset, removing the full path - * and leading groupSeparator. - * - * @param fullPath - * @return dataset name - */ - private String normalDatasetName(final String fullPath) { + /** + * Returns the name of the dataset, removing the full path and leading + * groupSeparator. + * + * @param fullPath + * @return dataset name + */ + private String normalDatasetName(final String fullPath) { - return fullPath.replaceAll("(^" + groupSeparator + "*)|(" + groupSeparator + "*$)", ""); - } + return fullPath.replaceAll("(^" + groupSeparator + "*)|(" + groupSeparator + "*$)", ""); + } - public N5TreeNode parse(final String dataset) { + public N5TreeNode parse(final String dataset) { - final N5TreeNode node = new N5TreeNode(dataset); - return parse(node); - } + final N5TreeNode node = new N5TreeNode(dataset); + return parse(node); + } - public N5TreeNode parse(final N5TreeNode node) { - // Go through all parsers to populate metadata - for (final N5MetadataParser parser : metadataParsers) { - try { - final Optional metadata = parser.apply(n5, node); - if (metadata.isPresent()) { - node.setMetadata(metadata.get()); - break; - } - } catch (final Exception e) { - } - } - return node; - } - - public void sortAndTrimRecursive(final N5TreeNode node) { - sortAndTrimRecursive( node, x -> { }); - } - - public void sortAndTrimRecursive(final N5TreeNode node, final Consumer callback ) { - trim(node, callback); - - if (comparator != null) - sort(node, callback); - - for (final N5TreeNode c : node.childrenList()) - sortAndTrimRecursive(c, callback); - } - - public void filterRecursive(final N5TreeNode node) { - - if (filter == null) - return; - - if (!filter.test(node)) - node.setMetadata(null); - - for (final N5TreeNode c : node.childrenList()) - filterRecursive(c); - } - - /** - * Parses metadata for the given node and all children in parallel using this object's executor. - * - * @param rootNode the root node - */ - public void parseMetadataRecursive(final N5TreeNode rootNode) { - parseMetadataRecursive( rootNode, x -> {}); - } - - /** - * Parses metadata for the given node and all children in parallel using this object's executor. - * The given function is called for every node after parsing is completed, successful or not. - * - * @param rootNode the root node - * @param callback the callback function - */ - public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback) { - /* depth first, check if we have children */ - final List children = rootNode.childrenList(); - final ArrayList> childrenFutures = new ArrayList>(); - if (!children.isEmpty()) { - /* If possible, parallelize the metadata parsing. */ - if (executor instanceof ThreadPoolExecutor) { - final ThreadPoolExecutor threadPoolExec = (ThreadPoolExecutor)this.executor; - for (final N5TreeNode child : children) { - final boolean useExec; - synchronized (executor) { - /* Since the parents wait for the children to finish, if there aren't enough threads to parse all the children (DFS), - * this could lock up. So we check if there are any extra threads; if not, execute if current thread. */ - useExec = (threadPoolExec.getActiveCount() < threadPoolExec.getMaximumPoolSize() - 1); - } - if (useExec) { - childrenFutures.add(this.executor.submit(() -> parseMetadataRecursive(child, callback))); - } else { - parseMetadataRecursive(child,callback); - } - } - } else { - for (final N5TreeNode child : children) { - parseMetadataRecursive(child,callback); + public N5TreeNode parse(final N5TreeNode node) { + // Go through all parsers to populate metadata + for (final N5MetadataParser parser : metadataParsers) { + try { + final Optional metadata = parser.apply(n5, node); + if (metadata.isPresent()) { + node.setMetadata(metadata.get()); + break; + } + } catch (final Exception e) { + } } - } + return node; } - for (final Future childrenFuture : childrenFutures) { - try { - childrenFuture.get(); - } catch (InterruptedException | ExecutionException e) { - LOG.error("Error encountered during metadata parsing", e); - throw new RuntimeException(e); - } + public void sortAndTrimRecursive(final N5TreeNode node) { + sortAndTrimRecursive(node, x -> { + }); } - try { - N5DatasetDiscoverer.parseMetadata(n5, rootNode, metadataParsers, groupParsers); - } catch (final Exception e) { + public void sortAndTrimRecursive(final N5TreeNode node, final Consumer callback) { + trim(node, callback); + + if (comparator != null) + sort(node, callback); + + for (final N5TreeNode c : node.childrenList()) + sortAndTrimRecursive(c, callback); } - LOG.debug("parsed metadata for: {}:\t found: {}", rootNode.getPath(), rootNode.getMetadata() == null ? "NONE" : rootNode.getMetadata().getClass().getSimpleName()); - callback.accept(rootNode); + public void filterRecursive(final N5TreeNode node) { + + if (filter == null) + return; + + if (!filter.test(node)) + node.setMetadata(null); + + for (final N5TreeNode c : node.childrenList()) + filterRecursive(c); + } + + /** + * Parses metadata for the given node and all children in parallel using this + * object's executor. + * + * @param rootNode the root node + */ + public void parseMetadataRecursive(final N5TreeNode rootNode) { + parseMetadataRecursive(rootNode, x -> { + }); + } + + /** + * Parses metadata for the given node and all children in parallel using this + * object's executor. The given function is called for every node after parsing + * is completed, successful or not. + * + * @param rootNode the root node + * @param callback the callback function + */ + public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback) { + /* depth first, check if we have children */ + final List children = rootNode.childrenList(); + final ArrayList> childrenFutures = new ArrayList>(); + if (!children.isEmpty()) { + /* If possible, parallelize the metadata parsing. */ + if (executor instanceof ThreadPoolExecutor) { + final ThreadPoolExecutor threadPoolExec = (ThreadPoolExecutor) this.executor; + for (final N5TreeNode child : children) { + final boolean useExec; + synchronized (executor) { + /* + * Since the parents wait for the children to finish, if there aren't enough + * threads to parse all the children (DFS), this could lock up. So we check if + * there are any extra threads; if not, execute if current thread. + */ + useExec = (threadPoolExec.getActiveCount() < threadPoolExec.getMaximumPoolSize() - 1); + } + if (useExec) { + childrenFutures.add(this.executor.submit(() -> parseMetadataRecursive(child, callback))); + } else { + parseMetadataRecursive(child, callback); + } + } + } else { + for (final N5TreeNode child : children) { + parseMetadataRecursive(child, callback); + } + } + } + + for (final Future childrenFuture : childrenFutures) { + try { + childrenFuture.get(); + } catch (InterruptedException | ExecutionException e) { + LOG.error("Error encountered during metadata parsing", e); + throw new RuntimeException(e); + } + } + + try { + N5DatasetDiscoverer.parseMetadata(n5, rootNode, metadataParsers, groupParsers); + } catch (final Exception e) { + } + LOG.debug("parsed metadata for: {}:\t found: {}", rootNode.getPath(), + rootNode.getMetadata() == null ? "NONE" : rootNode.getMetadata().getClass().getSimpleName()); + + callback.accept(rootNode); - if( rootNode.getMetadata() instanceof N5MetadataGroup ) { + if (rootNode.getMetadata() instanceof N5MetadataGroup) { - // spatial metadata groups may update their children metadata, and to be safe, - // run the callback on its children - @SuppressWarnings("unchecked") - final N5MetadataGroup grpMeta = (N5MetadataGroup)rootNode.getMetadata(); - for( final N5Metadata child : grpMeta.getChildrenMetadata() ) - { - rootNode.getDescendant(child.getPath()).ifPresent( x -> { - callback.accept( x ); - }); + // spatial metadata groups may update their children metadata, and to be safe, + // run the callback on its children + @SuppressWarnings("unchecked") + final N5MetadataGroup grpMeta = (N5MetadataGroup) rootNode.getMetadata(); + for (final N5Metadata child : grpMeta.getChildrenMetadata()) { + rootNode.getDescendant(child.getPath()).ifPresent(x -> { + callback.accept(x); + }); + } } } - } public static final List> fromParsers(final N5MetadataParser[] parsers) { return Arrays.asList(parsers); } - public static N5TreeNode discover(final N5Reader n5, final List> parsers, final List> groupParsers) { + public static N5TreeNode discover(final N5Reader n5, final List> parsers, + final List> groupParsers) { - final N5DatasetDiscoverer discoverer = new N5DatasetDiscoverer(n5, - Executors.newCachedThreadPool(), - parsers, groupParsers); + final N5DatasetDiscoverer discoverer = new N5DatasetDiscoverer(n5, Executors.newCachedThreadPool(), parsers, + groupParsers); try { return discoverer.discoverAndParseRecursive(""); - } catch (final IOException e) {} + } catch (final IOException e) { + } return null; } @@ -626,27 +603,27 @@ public static N5TreeNode discover(final N5Reader n5, final List meta = parser.parseMetadata(zarr, ""); +// System.out.println(meta.isPresent()); +// if(meta.isPresent()) { +// System.out.println(meta.get()); +// } + + } + } + From 425d36fddb996a8753537e85c46d5302129365be Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 12 Dec 2024 12:17:29 -0500 Subject: [PATCH 09/14] feat: add googleCloudCreateBucket option --- .../n5/universe/N5DatasetDiscoverer.java | 111 +++-- .../saalfeldlab/n5/universe/N5Factory.java | 7 + .../saalfeldlab/n5/universe/N5TreeNode.java | 422 +++++++++--------- 3 files changed, 299 insertions(+), 241 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index 7b2b694..dda762f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -105,7 +105,7 @@ public class N5DatasetDiscoverer { public static final N5MetadataParser[] DEFAULT_GROUP_PARSERS = new N5MetadataParser[] { new OmeNgffMetadataParser(), new N5CosemMultiScaleMetadata.CosemMultiScaleParser(), - new N5ViewerMultiscaleMetadataParser(), new CanonicalMetadataParser(), }; + new N5ViewerMultiscaleMetadataParser(), new CanonicalMetadataParser() }; public static final N5MetadataParser[] DEFAULT_SHALLOW_GROUP_PARSERS = new N5MetadataParser[] { new OmeNgffMetadataParser(true) }; @@ -304,13 +304,14 @@ public static void parseMetadata(final N5Reader n5, final N5TreeNode node, * @throws IOException the exception */ public static void parseMetadataShallow(final N5Reader n5, final N5TreeNode node, - final List> metadataParsers, final List> groupParsers) - throws IOException { + final List> metadataParsers, final List> groupParsers) { + + System.out.println("parse shallow for: " + node.getPath()); // Go through all parsers to populate metadata for (final N5MetadataParser parser : metadataParsers) { try { - Optional parsedMeta; + final Optional parsedMeta; parsedMeta = parser.apply(n5, node); parsedMeta.ifPresent(node::setMetadata); @@ -321,7 +322,7 @@ public static void parseMetadataShallow(final N5Reader n5, final N5TreeNode node } // this may be a group (e.g. multiscale pyramid) try to parse groups - for (final N5MetadataParser gp : groupParsers) { + for (final N5MetadataParser gp : DEFAULT_SHALLOW_GROUP_PARSERS) { final Optional groupMeta = gp.apply(n5, node); groupMeta.ifPresent(node::setMetadata); if (groupMeta.isPresent()) @@ -389,6 +390,28 @@ public void sort(final N5TreeNode node) { if (comparator != null) sort(node, comparator, null); } + + public N5TreeNode discoverShallow(final String base) { + + return discoverShallow(base, x -> {}); + } + + public N5TreeNode discoverShallow(final String base, final Consumer callback) { + + root = new N5TreeNode(base); + return discoverShallow(root, callback); + } + + public N5TreeNode discoverShallow(final N5TreeNode base) { + + return discoverShallow(base, x -> {}); + } + + public N5TreeNode discoverShallow(final N5TreeNode base, final Consumer callback) { + + parseMetadataShallow(n5, root, metadataParsers, groupParsers); + return root; + } /** * Recursively discovers and parses metadata for datasets that are children of @@ -401,14 +424,12 @@ public void sort(final N5TreeNode node) { */ public N5TreeNode discoverAndParseRecursive(final String base) throws IOException { - return discoverAndParseRecursive(base, x -> { - }); + return discoverAndParseRecursive(base, x -> {}); } public N5TreeNode discoverAndParseRecursive(final String base, final Consumer callback) throws IOException { - groupSeparator = n5.getGroupSeparator(); root = new N5TreeNode(base); discoverAndParseRecursive(root, callback); return root; @@ -422,8 +443,9 @@ public N5TreeNode discoverAndParseRecursive(final N5TreeNode root) throws IOExce public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consumer callback) throws IOException { - groupSeparator = n5.getGroupSeparator(); + discoverShallow(root, callback); + groupSeparator = n5.getGroupSeparator(); String[] datasetPaths; try { datasetPaths = n5.deepList(root.getPath(), executor); @@ -433,7 +455,9 @@ public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consume } callback.accept(root); - parseMetadataRecursive(root, callback); + // because we did a shallow metadata parsing already, skip parsing for this node + // when parsing recursively + parseMetadataRecursive(root, callback, true); sortAndTrimRecursive(root, callback); return root; @@ -509,7 +533,7 @@ public void parseMetadataRecursive(final N5TreeNode rootNode) { parseMetadataRecursive(rootNode, x -> { }); } - + /** * Parses metadata for the given node and all children in parallel using this * object's executor. The given function is called for every node after parsing @@ -519,6 +543,19 @@ public void parseMetadataRecursive(final N5TreeNode rootNode) { * @param callback the callback function */ public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback) { + parseMetadataRecursive(rootNode, callback, false); + } + + /** + * Parses metadata for the given node and all children in parallel using this + * object's executor. The given function is called for every node after parsing + * is completed, successful or not. + * + * @param rootNode the root node + * @param callback the callback function + * @param skipRoot skip parsing for this node + */ + public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback, final boolean skipParsing) { /* depth first, check if we have children */ final List children = rootNode.childrenList(); final ArrayList> childrenFutures = new ArrayList>(); @@ -558,12 +595,15 @@ public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer> fromParsers(final N5MetadataParser return Arrays.asList(parsers); } - public static N5TreeNode discover(final N5Reader n5, final List> parsers, + + public static N5TreeNode discover(final N5Reader n5, final String basePath, final List> parsers, final List> groupParsers) { final N5DatasetDiscoverer discoverer = new N5DatasetDiscoverer(n5, Executors.newCachedThreadPool(), parsers, groupParsers); try { - return discoverer.discoverAndParseRecursive(""); + return discoverer.discoverAndParseRecursive(basePath); } catch (final IOException e) { } return null; } + public static N5TreeNode discover(final N5Reader n5, final List> parsers, + final List> groupParsers) { + + return discover(n5, "", parsers, groupParsers); + } + public static N5TreeNode discover(final N5Reader n5, final List> parsers) { return discover(n5, parsers, null); } + + public static N5TreeNode discover(final N5Reader n5, final String basePath) { + + return discover(n5, basePath, Arrays.asList(DEFAULT_PARSERS), Arrays.asList(DEFAULT_GROUP_PARSERS)); + } public static N5TreeNode discover(final N5Reader n5) { @@ -618,12 +670,8 @@ public static N5TreeNode discover(final N5Reader n5) { public static N5TreeNode discoverShallow(final N5Reader n5, final String dataset) { final N5TreeNode node = new N5TreeNode(dataset); - - try { - parseMetadataShallow(n5, node, Arrays.asList(DEFAULT_PARSERS), - Arrays.asList(DEFAULT_SHALLOW_GROUP_PARSERS)); - } catch (IOException e) { - } + parseMetadataShallow(n5, node, Arrays.asList(DEFAULT_PARSERS), + Arrays.asList(DEFAULT_SHALLOW_GROUP_PARSERS)); return node; } @@ -641,23 +689,22 @@ public static N5TreeNode discoverShallow(final N5Reader n5) { public static void main(String[] args) throws IOException { -// final N5Reader zarr = new N5Factory().openReader("/home/john/tmp/mr.avg.ome.zarr"); + final N5Reader zarr = new N5Factory().openReader("/home/john/tmp/mr.avg.ome.zarr"); // final N5Reader zarr = new N5Factory() // .openReader("https://storage.googleapis.com/jax-public-ngff/KOMP/adult_lacZ/ndp/Moxd1/23420_K35061_FGut.zarr"); -// + // System.out.println(zarr); // System.out.println(zarr.exists("")); -// Storage gcs = GoogleCloudUtils.createGoogleCloudStorage(null); -// StorageOptions opts = gcs.getOptions(); -// System.out.println(opts); - - // N5TreeNode root = N5DatasetDiscoverer.discoverShallow(zarr, "/"); // System.out.println(root); -// System.out.println(root.getMetadata()); +// System.out.println("metadata: " + root.getMetadata()); // System.out.println(root.printRecursive()); + N5TreeNode root = N5DatasetDiscoverer.discover(zarr); + System.out.println(root); + System.out.println("metadata: " + root.getMetadata()); + System.out.println(root.printRecursive()); // OmeNgffMetadataParser parser = new OmeNgffMetadataParser( true ); // Optional meta = parser.parseMetadata(zarr, ""); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java index 1250688..e7e321e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5Factory.java @@ -87,6 +87,7 @@ public class N5Factory implements Serializable { private boolean zarrMapN5DatasetAttributes = true; private boolean zarrMergeAttributes = true; private String googleCloudProjectId = null; + private boolean googleCloudCreateBucket = false; private String s3Region = null; private AWSCredentials s3Credentials = null; private ClientConfiguration s3ClientConfiguration = null; @@ -135,6 +136,12 @@ public N5Factory zarrMergeAttributes(final boolean mergeAttributes) { return this; } + public N5Factory googleCloudCreateBucket(final boolean createBucket) { + + googleCloudCreateBucket = createBucket; + return this; + } + public N5Factory googleCloudProjectId(final String projectId) { googleCloudProjectId = projectId; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5TreeNode.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5TreeNode.java index 7df577b..cbc1ad1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5TreeNode.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5TreeNode.java @@ -41,9 +41,8 @@ import org.janelia.saalfeldlab.n5.universe.metadata.N5Metadata; /** - * A node representing a dataset or group in an N5 container, - * and stores its corresponding {@link N5Metadata}, and - * child nodes if any exist. + * A node representing a dataset or group in an N5 container, and stores its + * corresponding {@link N5Metadata}, and child nodes if any exist. * * @author Caleb Hulbert * @author John Bogovic @@ -51,255 +50,260 @@ */ public class N5TreeNode { - private final String path; + private final String path; - private N5Metadata metadata; + private N5Metadata metadata; - private final ArrayList children; + private final ArrayList children; - public N5TreeNode(final String path) { + public N5TreeNode(final String path) { - this.path = path.trim(); - children = new ArrayList<>(); - } + this.path = path.trim(); + children = new ArrayList<>(); + } + + public static Stream flattenN5Tree(N5TreeNode root) { - public static Stream flattenN5Tree(N5TreeNode root) { + return Stream.concat(Stream.of(root), root.childrenList().stream().flatMap(N5TreeNode::flattenN5Tree)); + } - return Stream.concat( - Stream.of(root), - root.childrenList().stream().flatMap(N5TreeNode::flattenN5Tree)); - } + public String getNodeName() { + + return Paths.get(removeLeadingSlash(path)).getFileName().toString(); + } - public String getNodeName() { + public String getParentPath() { - return Paths.get(removeLeadingSlash(path)).getFileName().toString(); - } + return Paths.get(removeLeadingSlash(path)).getParent().toString(); + } - public String getParentPath() { + /** + * Adds a node as a child of this node. + * + * @param child the child node + */ + public void add(final N5TreeNode child) { - return Paths.get(removeLeadingSlash(path)).getParent().toString(); - } + children.add(child); + } - /** - * Adds a node as a child of this node. - * - * @param child the child node - */ - public void add(final N5TreeNode child) { + public void remove(final N5TreeNode child) { - children.add(child); - } + children.remove(child); + } - public void remove(final N5TreeNode child) { + public void removeAllChildren() { - children.remove(child); - } + children.clear(); + } - public void removeAllChildren() { + public List childrenList() { - children.clear(); - } + return children; + } - public List childrenList() { + public Optional child(final String name) { + + final String childPath = N5URI.normalizeGroupPath(path + "/" + name); + return children.stream().filter(x -> N5URI.normalizeGroupPath(x.getPath()).equals(childPath)).findFirst(); + } + + public Optional getDescendant(String path) { + + return getDescendants(x -> N5URI.normalizeGroupPath(x.getPath()).equals(N5URI.normalizeGroupPath(path))) + .findFirst(); + } + + /** + * Adds a node at the specified full path and any parent nodes along the path, + * if they do not already exist. Returns the node at the specified path. + * + * @param path the full path to node + * @return the node + */ + public N5TreeNode addPath(final String path) { + return addPath(path, x -> new N5TreeNode(x)); + } + + /** + * Adds a node at the specified full path and any parent nodes along the path, + * if they do not already exist. Returns the node at the specified path. + * + * @param path the full path to node + * @param constructor function creating a node from a path + * @return the node + */ + public N5TreeNode addPath(final String path, Function constructor) { + final String normPath = removeLeadingSlash(path); + + if (!getPath().isEmpty() && !normPath.startsWith(getPath())) + return null; + + if (this.path.equals(normPath)) + return this; + + final String relativePath = removeLeadingSlash(normPath.replaceAll(this.path, "")); + final int sepIdx = relativePath.indexOf("/"); + final String childName; + if (sepIdx < 0) + childName = relativePath; + else + childName = relativePath.substring(0, sepIdx); + + // get the appropriate child along the path if it exists, otherwise add it + N5TreeNode child = null; + final Stream cs = children.stream().filter(n -> n.getNodeName().equals(childName)); + final Optional copt = cs.findFirst(); + if (copt.isPresent()) + child = copt.get(); + else { + child = constructor.apply(this.path.isEmpty() ? childName : this.path + "/" + childName); + add(child); + } + return child.addPath(normPath); + } - return children; - } + public Stream getDescendants(Predicate filter) { - public Optional getDescendant( String path ) { + return N5TreeNode.flattenN5Tree(this).filter(filter); + } - return getDescendants(x -> N5URI.normalizeGroupPath(x.getPath()).equals( - N5URI.normalizeGroupPath(path))).findFirst(); - } + public boolean isDataset() { - /** - * Adds a node at the specified full path and any parent nodes along the path, - * if they do not already exist. Returns the node at the specified path. - * - * @param path the full path to node - * @return the node - */ - public N5TreeNode addPath( final String path ) { - return addPath( path, x -> new N5TreeNode( x )); - } + return Optional.ofNullable(getMetadata()).map(N5DatasetMetadata.class::isInstance).orElse(false); + } - /** - * Adds a node at the specified full path and any parent nodes along the path, - * if they do not already exist. Returns the node at the specified path. - * - * @param path the full path to node - * @param constructor function creating a node from a path - * @return the node - */ - public N5TreeNode addPath( final String path, Function constructor ) { - final String normPath = removeLeadingSlash(path); + public void setMetadata(final N5Metadata metadata) { - if( !getPath().isEmpty() && !normPath.startsWith(getPath())) - return null; + this.metadata = metadata; + } - if( this.path.equals(normPath)) - return this; + public N5Metadata getMetadata() { - final String relativePath = removeLeadingSlash( normPath.replaceAll(this.path, "")); - final int sepIdx = relativePath.indexOf("/"); - final String childName; - if( sepIdx < 0 ) - childName = relativePath; - else - childName = relativePath.substring(0, sepIdx); + return metadata; + } - // get the appropriate child along the path if it exists, otherwise add it - N5TreeNode child = null; - final Stream cs = children.stream().filter( n -> n.getNodeName().equals(childName)); - final Optional copt = cs.findFirst(); - if( copt.isPresent() ) - child = copt.get(); - else { - child = constructor.apply( this.path.isEmpty() ? childName : this.path + "/" + childName ); - add( child ); - } - return child.addPath(normPath); - } + public String getPath() { - public Stream getDescendants( Predicate filter ) { + return path; + } - return N5TreeNode.flattenN5Tree(this).filter( filter ); - } + @Override + public String toString() { - public boolean isDataset() { + final String nodeName = getNodeName(); + return nodeName.isEmpty() ? "/" : nodeName; + } - return Optional.ofNullable(getMetadata()).map(N5DatasetMetadata.class::isInstance).orElse(false); - } + public boolean structureEquals(N5TreeNode other) { + final boolean samePath = getPath().equals(other.getPath()); + if (!samePath) + return false; - public void setMetadata(final N5Metadata metadata) { + boolean childrenEqual = true; + for (final N5TreeNode c : childrenList()) { + final Optional otherChildOpt = other.childrenList().stream() + .filter(x -> x.getNodeName().equals(c.getNodeName())).findFirst(); - this.metadata = metadata; - } + childrenEqual = childrenEqual && otherChildOpt.map(x -> x.structureEquals(c)).orElse(false); - public N5Metadata getMetadata() { + if (!childrenEqual) + break; + } + return childrenEqual; + } - return metadata; - } + public String printRecursive() { - public String getPath() { - - return path; - } + return printRecursiveHelper(this, ""); + } - @Override - public String toString() { + private static String printRecursiveHelper(N5TreeNode node, String prefix) { - final String nodeName = getNodeName(); - return nodeName.isEmpty() ? "/" : nodeName; - } + final StringBuffer out = new StringBuffer(); + out.append(prefix + node.path + "\n"); + for (final N5TreeNode c : node.childrenList()) { + System.out.println(c.path); + out.append(printRecursiveHelper(c, prefix + " ")); + } - public boolean structureEquals( N5TreeNode other ) - { - final boolean samePath = getPath().equals(other.getPath()); - if( !samePath ) - return false; - - boolean childrenEqual = true; - for( final N5TreeNode c : childrenList()) { - final Optional otherChildOpt = other.childrenList().stream() - .filter( x -> x.getNodeName().equals( c.getNodeName())) - .findFirst(); - - childrenEqual = childrenEqual && - otherChildOpt.map( x -> x.structureEquals(c)) - .orElse(false); - - if( !childrenEqual ) - break; - } - return childrenEqual; - } - - public String printRecursive() { - - return printRecursiveHelper(this, ""); - } - - private static String printRecursiveHelper(N5TreeNode node, String prefix) { - - final StringBuffer out = new StringBuffer(); - out.append(prefix + node.path + "\n"); - for (final N5TreeNode c : node.childrenList()) { - System.out.println(c.path); - out.append(printRecursiveHelper(c, prefix + " ")); + return out.toString(); } - return out.toString(); - } - - /** - * Generates a tree based on the output of {@link N5Reader#deepList}, returning the root node. - * - * @param base the path used to call deepList - * @param pathList the output of deepList - * @param groupSeparator the n5 group separator - * @return the root node - */ - public static N5TreeNode fromFlatList(final String base, final String[] pathList, final String groupSeparator) { - - final N5TreeNode root = new N5TreeNode(base); - fromFlatList( root, pathList, groupSeparator ); - return root; - } - - /** - * Generates a tree based on the output of {@link N5Reader#deepList}, returning the root node. - * - * @param root the root node corresponding to the base - * @param pathList the output of deepList - * @param groupSeparator the n5 group separator - */ - public static void fromFlatList(final N5TreeNode root, final String[] pathList, final String groupSeparator) { - - final HashMap pathToNode = new HashMap<>(); - - final String normalizedBase = normalDatasetName(root.getPath(), groupSeparator); - pathToNode.put(normalizedBase, root); - - // sort the paths by length such that parent nodes always have smaller - // indexes than their children - Arrays.sort(pathList); - - final String prefix = normalizedBase == groupSeparator ? "" : normalizedBase; - for (final String datasetPath : pathList) { - - final String fullPath = prefix + groupSeparator + datasetPath; - final N5TreeNode node = new N5TreeNode(fullPath); - pathToNode.put(fullPath, node); - - final String parentPath = fullPath.substring(0, fullPath.lastIndexOf(groupSeparator)); - - N5TreeNode parent = pathToNode.get(parentPath); - if (parent == null) { - // possible for the parent to not appear in the list - // if deepList is called with a filter - parent = new N5TreeNode(parentPath); - pathToNode.put(parentPath, parent); - } - parent.add(node); + /** + * Generates a tree based on the output of {@link N5Reader#deepList}, returning + * the root node. + * + * @param base the path used to call deepList + * @param pathList the output of deepList + * @param groupSeparator the n5 group separator + * @return the root node + */ + public static N5TreeNode fromFlatList(final String base, final String[] pathList, final String groupSeparator) { + + final N5TreeNode root = new N5TreeNode(base); + fromFlatList(root, pathList, groupSeparator); + return root; } - } - private static String normalDatasetName(final String fullPath, final String groupSeparator) { + /** + * Generates a tree based on the output of {@link N5Reader#deepList}, returning + * the root node. + * + * @param root the root node corresponding to the base + * @param pathList the output of deepList + * @param groupSeparator the n5 group separator + */ + public static void fromFlatList(final N5TreeNode root, final String[] pathList, final String groupSeparator) { + + final HashMap pathToNode = new HashMap<>(); + + final String normalizedBase = normalDatasetName(root.getPath(), groupSeparator); + pathToNode.put(normalizedBase, root); - return fullPath.replaceAll("(^" + groupSeparator + "*)|(" + groupSeparator + "*$)", ""); - } + // sort the paths by length such that parent nodes always have smaller + // indexes than their children + Arrays.sort(pathList); - /** - * Removes the leading slash from a given path and returns the corrected path. - * It ensures correctness on both Unix and Windows, otherwise {@code pathName} is treated - * as UNC path on Windows, and {@code Paths.get(pathName, ...)} fails with {@code InvalidPathException}. - * - * @param pathName the path - * @return the corrected path - */ - protected static String removeLeadingSlash(final String pathName) { + final String prefix = normalizedBase == groupSeparator ? "" : normalizedBase; + for (final String datasetPath : pathList) { - return pathName.startsWith("/") || pathName.startsWith("\\") ? pathName.substring(1) : pathName; - } + final String fullPath = prefix + groupSeparator + datasetPath; + final N5TreeNode node = new N5TreeNode(fullPath); + pathToNode.put(fullPath, node); -} + final String parentPath = fullPath.substring(0, fullPath.lastIndexOf(groupSeparator)); + + N5TreeNode parent = pathToNode.get(parentPath); + if (parent == null) { + // possible for the parent to not appear in the list + // if deepList is called with a filter + parent = new N5TreeNode(parentPath); + pathToNode.put(parentPath, parent); + } + + if (!parent.child(datasetPath).isPresent()) + parent.add(node); + + } + } + + private static String normalDatasetName(final String fullPath, final String groupSeparator) { + + return fullPath.replaceAll("(^" + groupSeparator + "*)|(" + groupSeparator + "*$)", ""); + } + + /** + * Removes the leading slash from a given path and returns the corrected path. + * It ensures correctness on both Unix and Windows, otherwise {@code pathName} + * is treated as UNC path on Windows, and {@code Paths.get(pathName, ...)} fails + * with {@code InvalidPathException}. + * + * @param pathName the path + * @return the corrected path + */ + protected static String removeLeadingSlash(final String pathName) { + + return pathName.startsWith("/") || pathName.startsWith("\\") ? pathName.substring(1) : pathName; + } +} \ No newline at end of file From cce03231cac31e6ce08f51a9b801fb2240bd68f7 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 12 Dec 2024 14:04:07 -0500 Subject: [PATCH 10/14] fix: recursive group discovery after shallow discovery * need to run group parsers for metadata that depend on listed children --- .../saalfeldlab/n5/universe/N5DatasetDiscoverer.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index dda762f..9a884b3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -306,8 +306,6 @@ public static void parseMetadata(final N5Reader n5, final N5TreeNode node, public static void parseMetadataShallow(final N5Reader n5, final N5TreeNode node, final List> metadataParsers, final List> groupParsers) { - System.out.println("parse shallow for: " + node.getPath()); - // Go through all parsers to populate metadata for (final N5MetadataParser parser : metadataParsers) { try { @@ -553,9 +551,9 @@ public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback, final boolean skipParsing) { + public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer callback, final boolean skipParsingIfPresent) { /* depth first, check if we have children */ final List children = rootNode.childrenList(); final ArrayList> childrenFutures = new ArrayList>(); @@ -595,16 +593,16 @@ public void parseMetadataRecursive(final N5TreeNode rootNode, final Consumer Date: Fri, 13 Dec 2024 15:03:26 -0500 Subject: [PATCH 11/14] chore: bump pom-scijava to 40.0.0 * bump n5-aws-s3, n5-google-cloud, and n5-zarr to latest releases --- pom.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e39d49b..8776bc2 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ org.scijava pom-scijava - 39.0.0 + 40.0.0 @@ -110,7 +110,11 @@ sign,deploy-to-scijava - + + 4.2.2 + 4.1.2 + 1.4.0 + 1.0.0-preview.20191208 1.4.1 From 35b01362130bb45baa288d3d6aa3876ede9d16b7 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 13 Dec 2024 15:03:49 -0500 Subject: [PATCH 12/14] fix: DatasetDiscovery discoverShallow --- .../saalfeldlab/n5/universe/N5DatasetDiscoverer.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index 9a884b3..7127030 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -407,8 +407,8 @@ public N5TreeNode discoverShallow(final N5TreeNode base) { public N5TreeNode discoverShallow(final N5TreeNode base, final Consumer callback) { - parseMetadataShallow(n5, root, metadataParsers, groupParsers); - return root; + parseMetadataShallow(n5, base, metadataParsers, groupParsers); + return base; } /** @@ -442,14 +442,16 @@ public N5TreeNode discoverAndParseRecursive(final N5TreeNode root, final Consume throws IOException { discoverShallow(root, callback); + callback.accept(root); + sortAndTrimRecursive(root, callback); groupSeparator = n5.getGroupSeparator(); String[] datasetPaths; try { datasetPaths = n5.deepList(root.getPath(), executor); N5TreeNode.fromFlatList(root, datasetPaths, groupSeparator); - } catch (final Exception e) { - return null; + } catch (final Exception ignore) { + return root; } callback.accept(root); From 14a0de4c6e5463bc4907f3f51168da66c66e45e6 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 13 Dec 2024 15:10:50 -0500 Subject: [PATCH 13/14] doc: fix shallow discovery methods doc --- .../janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index 7127030..9ab3ef7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -301,7 +301,6 @@ public static void parseMetadata(final N5Reader n5, final N5TreeNode node, * @param node the tree node * @param metadataParsers list of metadata parsers * @param groupParsers list of group parsers - * @throws IOException the exception */ public static void parseMetadataShallow(final N5Reader n5, final N5TreeNode node, final List> metadataParsers, final List> groupParsers) { @@ -665,7 +664,7 @@ public static N5TreeNode discover(final N5Reader n5) { * * @param n5 the reader * @param dataset the dataset - * @return a shallow node + * @return a node with shallow-parsed metadata */ public static N5TreeNode discoverShallow(final N5Reader n5, final String dataset) { @@ -680,7 +679,7 @@ public static N5TreeNode discoverShallow(final N5Reader n5, final String dataset * Discovers metadata present at the root without listing * * @param n5 the reader - * @return + * @return the root tree node */ public static N5TreeNode discoverShallow(final N5Reader n5) { From b003042b847e6134c6b9b4ecd5a1c154b6c2ec48 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 13 Dec 2024 15:10:58 -0500 Subject: [PATCH 14/14] fix: rm main --- .../n5/universe/N5DatasetDiscoverer.java | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java index 9ab3ef7..06c4d26 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/universe/N5DatasetDiscoverer.java @@ -686,33 +686,5 @@ public static N5TreeNode discoverShallow(final N5Reader n5) { return discoverShallow(n5, "/"); } - public static void main(String[] args) throws IOException { - - final N5Reader zarr = new N5Factory().openReader("/home/john/tmp/mr.avg.ome.zarr"); -// final N5Reader zarr = new N5Factory() -// .openReader("https://storage.googleapis.com/jax-public-ngff/KOMP/adult_lacZ/ndp/Moxd1/23420_K35061_FGut.zarr"); - -// System.out.println(zarr); -// System.out.println(zarr.exists("")); - -// N5TreeNode root = N5DatasetDiscoverer.discoverShallow(zarr, "/"); -// System.out.println(root); -// System.out.println("metadata: " + root.getMetadata()); -// System.out.println(root.printRecursive()); - - N5TreeNode root = N5DatasetDiscoverer.discover(zarr); - System.out.println(root); - System.out.println("metadata: " + root.getMetadata()); - System.out.println(root.printRecursive()); - -// OmeNgffMetadataParser parser = new OmeNgffMetadataParser( true ); -// Optional meta = parser.parseMetadata(zarr, ""); -// System.out.println(meta.isPresent()); -// if(meta.isPresent()) { -// System.out.println(meta.get()); -// } - - } - }