From 1565d751af19918edc7bf1c94625232839cc791e Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 16 Aug 2024 10:08:44 -0300 Subject: [PATCH] Add `validate_metadata` to interface.run_conversion (#949) Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- CHANGELOG.md | 1 + src/neuroconv/basedatainterface.py | 15 +++++++++++++-- src/neuroconv/nwbconverter.py | 18 ++++++++++++++---- .../tools/testing/data_interface_mixins.py | 3 +++ src/neuroconv/utils/json_schema.py | 12 +----------- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7a6ff48d..3dae5881b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Add Plexon2 support [PR #918](https://github.com/catalystneuro/neuroconv/pull/918) * Converter working with multiple VideoInterface instances [PR #914](https://github.com/catalystneuro/neuroconv/pull/914) * Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for basic automated submission of AWS batch jobs. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) +* Data interfaces `run_conversion` method now performs metadata validation before running the conversion. [PR #949](https://github.com/catalystneuro/neuroconv/pull/949) * Introduced `null_values_for_properties` to `add_units_table` to give user control over null values behavior [PR #989](https://github.com/catalystneuro/neuroconv/pull/989) diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py index c279d8900..cd42a62dc 100644 --- a/src/neuroconv/basedatainterface.py +++ b/src/neuroconv/basedatainterface.py @@ -64,14 +64,20 @@ def get_metadata(self) -> DeepDict: return metadata - def validate_metadata(self, metadata: dict) -> None: + def validate_metadata(self, metadata: dict, append_mode: bool = False) -> None: """Validate the metadata against the schema.""" encoder = NWBMetaDataEncoder() # The encoder produces a serialized object, so we deserialized it for comparison serialized_metadata = encoder.encode(metadata) decoded_metadata = json.loads(serialized_metadata) - validate(instance=decoded_metadata, schema=self.get_metadata_schema()) + metdata_schema = self.get_metadata_schema() + if append_mode: + # Eliminate required from NWBFile + nwbfile_schema = metdata_schema["properties"]["NWBFile"] + nwbfile_schema.pop("required", None) + + validate(instance=decoded_metadata, schema=metdata_schema) def create_nwbfile(self, metadata: Optional[dict] = None, **conversion_options) -> NWBFile: """ @@ -157,6 +163,11 @@ def run_conversion( if metadata is None: metadata = self.get_metadata() + file_initially_exists = Path(nwbfile_path).exists() if nwbfile_path is not None else False + append_mode = file_initially_exists and not overwrite + + self.validate_metadata(metadata=metadata, append_mode=append_mode) + with make_or_load_nwbfile( nwbfile_path=nwbfile_path, nwbfile=nwbfile, diff --git a/src/neuroconv/nwbconverter.py b/src/neuroconv/nwbconverter.py index b654c1a68..21ec4475a 100644 --- a/src/neuroconv/nwbconverter.py +++ b/src/neuroconv/nwbconverter.py @@ -101,13 +101,20 @@ def get_metadata(self) -> DeepDict: metadata = dict_deep_update(metadata, interface_metadata) return metadata - def validate_metadata(self, metadata: Dict[str, dict]): + def validate_metadata(self, metadata: Dict[str, dict], append_mode: bool = False): """Validate metadata against Converter metadata_schema.""" encoder = NWBMetaDataEncoder() # The encoder produces a serialized object, so we deserialized it for comparison serialized_metadata = encoder.encode(metadata) decoded_metadata = json.loads(serialized_metadata) - validate(instance=decoded_metadata, schema=self.get_metadata_schema()) + + metadata_schema = self.get_metadata_schema() + if append_mode: + # Eliminate required from NWBFile + nwbfile_schema = metadata_schema["properties"]["NWBFile"] + nwbfile_schema.pop("required", None) + + validate(instance=decoded_metadata, schema=metadata_schema) if self.verbose: print("Metadata is valid!") @@ -206,7 +213,7 @@ def run_conversion( """ if nwbfile_path is None: - warnings.warn( # TODO: remove on or after 12/26/2024 + warnings.warn( # TODO: remove on or after 2024/12/26 "Using Converter.run_conversion without specifying nwbfile_path is deprecated. To create an " "NWBFile object in memory, use Converter.create_nwbfile. To append to an existing NWBFile object," " use Converter.add_to_nwbfile." @@ -215,10 +222,13 @@ def run_conversion( backend = _resolve_backend(backend, backend_configuration) no_nwbfile_provided = nwbfile is None # Otherwise, variable reference may mutate later on inside the context + file_initially_exists = Path(nwbfile_path).exists() if nwbfile_path is not None else False + append_mode = file_initially_exists and not overwrite + if metadata is None: metadata = self.get_metadata() - self.validate_metadata(metadata=metadata) + self.validate_metadata(metadata=metadata, append_mode=append_mode) self.validate_conversion_options(conversion_options=conversion_options) self.temporally_align_data_interfaces() diff --git a/src/neuroconv/tools/testing/data_interface_mixins.py b/src/neuroconv/tools/testing/data_interface_mixins.py index 77d42b55f..637b8e43a 100644 --- a/src/neuroconv/tools/testing/data_interface_mixins.py +++ b/src/neuroconv/tools/testing/data_interface_mixins.py @@ -139,6 +139,7 @@ def check_run_conversion_with_backend_configuration( nwbfile_path=nwbfile_path, nwbfile=nwbfile, overwrite=True, + metadata=metadata, backend_configuration=backend_configuration, **self.conversion_options, ) @@ -158,6 +159,7 @@ class TestNWBConverter(NWBConverter): metadata["NWBFile"].update(session_start_time=datetime.now().astimezone()) conversion_options = dict(Test=self.conversion_options) + converter.run_conversion( nwbfile_path=nwbfile_path, overwrite=True, @@ -947,6 +949,7 @@ def check_run_conversion_with_backend_configuration( nwbfile_path=nwbfile_path, nwbfile=nwbfile, overwrite=True, + metadata=metadata, backend_configuration=backend_configuration, **self.conversion_options, ) diff --git a/src/neuroconv/utils/json_schema.py b/src/neuroconv/utils/json_schema.py index b113e2c72..921e3c316 100644 --- a/src/neuroconv/utils/json_schema.py +++ b/src/neuroconv/utils/json_schema.py @@ -34,7 +34,7 @@ def default(self, obj): return super().default(obj) -class NWBSourceDataEncoder(json.JSONEncoder): +class NWBSourceDataEncoder(NWBMetaDataEncoder): def default(self, obj): @@ -42,16 +42,6 @@ def default(self, obj): if isinstance(obj, Path): return str(obj) - if isinstance(obj, datetime): - return obj.isoformat() - - # Transform numpy generic integers and floats to python ints floats - if isinstance(obj, np.generic): - return obj.item() - - if isinstance(obj, np.ndarray): - return obj.tolist() - return super().default(obj)