Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disallow hdmf 3.14.4, make organize to not parallelize for a single file, log information about all exceptions while reading metadata for organize #1496

Merged
merged 4 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 28 additions & 14 deletions dandi/organize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os.path as op
from pathlib import Path, PurePosixPath
import re
import traceback
import uuid

import ruamel.yaml
Expand Down Expand Up @@ -841,44 +842,57 @@ def act(func, *args, **kwargs):
# react to those
# Doesn't play nice with Parallel
# with tqdm.tqdm(desc="Files", total=len(paths), unit="file", unit_scale=False) as pbar:
failed = []

def _get_metadata(path):
# Avoid heavy import by importing within function:
from .metadata.nwb import get_metadata

meta, exc = {}, None
try:
meta = get_metadata(path)
except Exception as exc:
meta = {}
failed.append(path)
# pbar.desc = "Files (%d failed)" % len(failed)
lgr.debug("Failed to get metadata for %s: %s", path, exc)
except Exception as e:
exc = (
e.__class__,
str(e),
traceback.TracebackException.from_exception(e),
)
# pbar.update(1)
meta["path"] = path
return meta
return meta, exc

if (
not devel_debug and jobs != 1
not devel_debug and jobs != 1 and not len(paths) == 1
): # Do not use joblib at all if number_of_jobs=1
# Note: It is Python (pynwb) intensive, not IO, so ATM there is little
# to no benefit from Parallel without using multiproc! But that would
# complicate progress bar indication... TODO
metadata = list(
metadata_excs = list(
Parallel(n_jobs=jobs, verbose=10)(
delayed(_get_metadata)(path) for path in paths
)
)
else:
metadata = list(map(_get_metadata, paths))
if failed:
metadata_excs = list(map(_get_metadata, paths))
exceptions = [e for _, e in metadata_excs if e]
if exceptions:
lgr.warning(
"Failed to load metadata for %d out of %d files",
len(failed),
"Failed to load metadata for %d out of %d files "
"due to following types of exceptions: %s. "
"Details of the exceptions will be shown at DEBUG level",
len(exceptions),
len(paths),
", ".join(e[0].__name__ for e in exceptions),
)
for m, e in metadata_excs:
if not e:
continue
lgr.debug(
"Loading metadata for path %s resulted in following exception:\n%s",
m["path"],
"\n".join(e[-1].format()),
)

metadata, skip_invalid = filter_invalid_metadata_rows(metadata)
metadata, skip_invalid = filter_invalid_metadata_rows([m for m, _ in metadata_excs])
if skip_invalid:
msg = (
"%d out of %d files were found not containing all necessary "
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ install_requires =
etelemetry >= 0.2.2
fasteners
fscacher >= 0.3.0
hdmf != 3.5.0
# 3.14.4: https://github.com/hdmf-dev/hdmf/issues/1186
hdmf != 3.5.0,!=3.14.4
humanize
interleave ~= 0.1
joblib
Expand Down
Loading