From b2cbca5b0970b95d816245eecafaad0069433822 Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen Date: Sat, 18 Nov 2023 18:59:18 +0100 Subject: [PATCH 1/6] test utils.uuid_from_string --- tests/test_units/test_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_units/test_utils.py b/tests/test_units/test_utils.py index bb77702e0..2eeb59521 100644 --- a/tests/test_units/test_utils.py +++ b/tests/test_units/test_utils.py @@ -25,3 +25,16 @@ ) def test_check_if_number(value, result): assert utils.check_if_number(value) == result + + +def test_uuid_from_string(): + """Test the uuid_from_string method.""" + result = utils.uuid_from_string("mystring") + assert len(result) == 36 + assert isinstance(result, str) + + # test repeatability + first = utils.uuid_from_string("mystring") + second = utils.uuid_from_string("mystring") + + assert first == second From e2d1ebe07d8025a6f450df6fe9809e396c61fc2f Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen Date: Sat, 18 Nov 2023 20:09:20 +0100 Subject: [PATCH 2/6] describe relations block in schema --- .../0.8.0/examples/table_inplace.yml | 5 ++++ .../definitions/0.8.0/schema/fmu_results.json | 15 ++++++++++ tests/test_schema/test_schema_logic.py | 29 +++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/schema/definitions/0.8.0/examples/table_inplace.yml b/schema/definitions/0.8.0/examples/table_inplace.yml index bab1784ee..ca41baf3a 100644 --- a/schema/definitions/0.8.0/examples/table_inplace.yml +++ b/schema/definitions/0.8.0/examples/table_inplace.yml @@ -67,6 +67,11 @@ file: checksum_md5: kjhsdfvsdlfk23knerknvk23 # checksum of the file, not the data. size_bytes: 5010144 +relations: + collections: + - 1b4fb471-159d-4b46-9b25-229419cd349b + - 3d338cd7-926a-49c0-8d6b-8ae2063f681f + data: # The data block describes the actual data (e.g. surface). Only present in data objects content: inplace_volumes # white-listed and standardized diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index e7e969db0..5cdd68cc3 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -662,6 +662,18 @@ } } }, + "relations": { + "type": "object", + "properties": { + "collections": { + "type": "array", + "items": { + "$ref": "#/definitions/generic/uuid" + } + } + }, + "additionalProperties": false + }, "display": { "type": "object", "properties": { @@ -1229,6 +1241,9 @@ }, "class": { "$ref": "#/definitions/class" + }, + "relations": { + "$ref": "#/definitions/relations" } }, "oneOf": [ diff --git a/tests/test_schema/test_schema_logic.py b/tests/test_schema/test_schema_logic.py index 4958297aa..cb353c2cd 100644 --- a/tests/test_schema/test_schema_logic.py +++ b/tests/test_schema/test_schema_logic.py @@ -414,3 +414,32 @@ def test_schema_content_synch_with_code(schema_080): raise ValueError( f"content '{allowed_content}' allowed in code, but not schema." ) + + +def test_schema_relations(schema_080, metadata_examples): + """Test the relations.collections.""" + + # fetch surface example + metadata = deepcopy(metadata_examples["table_inplace.yml"]) + + # test assumption + assert "relations" in metadata + assert isinstance(metadata["relations"]["collections"], list) + + # validate as-is + jsonschema.validate(instance=metadata, schema=schema_080) + + # non-uuid, shall fail + metadata["relations"]["collections"].append("non-uuid") + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=metadata, schema=schema_080) + metadata["relations"]["collections"].pop() # cleanup + + # insert non-valid property, shall fail + metadata["relations"]["tst"] = "hei" + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=metadata, schema=schema_080) + + # remove relations block, shall still validate (not required) + del metadata["relations"] + jsonschema.validate(instance=metadata, schema=schema_080) From 7749a5d39b6aa847a2b012511cd4af8f02424e88 Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen Date: Sat, 18 Nov 2023 20:16:05 +0100 Subject: [PATCH 3/6] add support for collection memberships --- src/fmu/dataio/_metadata.py | 60 +++++++++++++ src/fmu/dataio/dataio.py | 4 + tests/test_units/test_dataio.py | 17 ++++ tests/test_units/test_metadata_class.py | 109 ++++++++++++++++++++++++ 4 files changed, 190 insertions(+) diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index c02ff068b..bbef6f5df 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -21,6 +21,7 @@ export_file_compute_checksum_md5, glue_metadata_preprocessed, read_metadata, + uuid_from_string, ) logger = logging.getLogger(__name__) @@ -163,6 +164,48 @@ def generate_meta_access(config: dict) -> Optional[dict]: return a_meta +def generate_meta_relations(dataio, meta_fmu: dict) -> Optional[dict]: + """Generate the relations block. + + The collection_name argument can be a str or a list, and shall be combined with the + current fmu.case.uuid in a valid uuid4. This shall be put into the + relations.collections attribute, which shall be a list. + """ + + logger.debug("generate_meta_relations") + r_meta = {"collections": []} + + if dataio.collection_name is None: + logger.debug("dataio.collection_name is None, returning empty") + return + + if isinstance(dataio.collection_name, str): + logger.debug("collection_name is given as a string") + cnames = [dataio.collection_name] + elif isinstance(dataio.collection_name, list): + logger.debug("collection_name is given as a list") + cnames = dataio.collection_name + else: + raise ValueError("'collection_name' must be str or list.") + + # case.uuid + if not meta_fmu: + # for non-FMU runs, RMS-gui, etc. + logger.debug("No case uuid") + case_uuid = "" + else: + case_uuid = meta_fmu["case"]["uuid"] + logger.debug("case uuid is %s", case_uuid) + + for cname in cnames: + logger.debug("Making collection uuid for %s", cname) + collection_uuid = uuid_from_string(f"{case_uuid}{cname}") + logger.debug("uuid returned was %s", collection_uuid) + r_meta["collections"].append(collection_uuid) + + return r_meta + + @dataclass class _MetaData: """Class for sampling, process and holding all metadata in an ExportData instance. @@ -208,6 +251,8 @@ class _MetaData: meta_file: dict = field(default_factory=dict, init=False) meta_tracklog: list = field(default_factory=list, init=False) meta_fmu: dict = field(default_factory=dict, init=False) + meta_relations: dict = field(default_factory=dict, init=False) + # temporary storage for preprocessed data: meta_xpreprocessed: dict = field(default_factory=dict, init=False) @@ -340,6 +385,18 @@ def _populate_meta_access(self): if self.dataio: self.meta_access = generate_meta_access(self.dataio.config) + def _populate_meta_relations(self): + """Populate the relations block. + + The relations block shall contain information about relationships between this + data object and others. + + TODO: Migrate existing relational attributes here, e.g. "parent". + + """ + if self.dataio: + self.meta_relations = generate_meta_relations(self.dataio, self.meta_fmu) + def _populate_meta_xpreprocessed(self): """Populate a few necessary 'tmp' metadata needed for preprocessed data.""" if self.dataio.fmu_context == "preprocessed": @@ -375,6 +432,7 @@ def generate_export_metadata(self, skip_null=True) -> dict: # TODO! -> skip_nul self._populate_meta_fmu() self._populate_meta_file() self._populate_meta_xpreprocessed() + self._populate_meta_relations() # glue together metadata, order is as legacy code (but will be screwed if reuse # of existing metadata...) @@ -391,6 +449,8 @@ def generate_export_metadata(self, skip_null=True) -> dict: # TODO! -> skip_nul meta["access"] = self.meta_access meta["masterdata"] = self.meta_masterdata + meta["relations"] = self.meta_relations + if self.dataio.fmu_context == "preprocessed": meta["_preprocessed"] = self.meta_xpreprocessed diff --git a/src/fmu/dataio/dataio.py b/src/fmu/dataio/dataio.py index 6e740bb22..389bee72d 100644 --- a/src/fmu/dataio/dataio.py +++ b/src/fmu/dataio/dataio.py @@ -385,6 +385,9 @@ class ExportData: the file structure or by other means. See also fmu_context, where "case" may need an explicit casepath! + collection_name: To include a data object in a collection. Shall be string or + list of strings. Will be combined with case uuid. + config: Required in order to produce valid metadata, either as key (here) or through an environment variable. A dictionary with static settings. In the standard case this is read from FMU global variables @@ -561,6 +564,7 @@ class ExportData: access_ssdl: dict = field(default_factory=dict) aggregation: bool = False casepath: Union[str, Path, None] = None + collection_name: Union[str, List[str], None] = None config: dict = field(default_factory=dict) content: Union[dict, str, None] = None depth_reference: str = "msl" diff --git a/tests/test_units/test_dataio.py b/tests/test_units/test_dataio.py index e4bb40026..c38df85c1 100644 --- a/tests/test_units/test_dataio.py +++ b/tests/test_units/test_dataio.py @@ -497,3 +497,20 @@ def test_forcefolder_absolute_shall_raise_or_warn(tmp_path, globalconfig2, regsu ) ExportData.allow_forcefolder_absolute = False # reset ExportData._inside_rms = False + + +def test_exportdata_with_collection_name(globalconfig2, regsurf, arrowtable, polygons): + """Test export of multiple objects with common collection_name.""" + + edata = ExportData( + config=globalconfig2, content="volumes", collection_name="mycollection" + ) + metadatas = [] + for obj in [regsurf, arrowtable, polygons]: + metadatas.append(edata.generate_metadata(obj, name="myname")) + + collection_uuids = [ + metadata["relations"]["collections"][0] for metadata in metadatas + ] + assert len(set(collection_uuids)) == 1 + assert len(collection_uuids[0]) == 36 diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index c4c8395ab..d37fdb692 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -1,6 +1,7 @@ """Test the _MetaData class from the _metadata.py module""" import logging from copy import deepcopy +import os import pytest @@ -294,6 +295,114 @@ def test_metadata_access_rep_include(globalconfig1): """Test the input of the rep_include field.""" +# -------------------------------------------------------------------------------------- +# RELATIONS block +# -------------------------------------------------------------------------------------- + + +def test_metadata_relations_no_collection_name(globalconfig1): + "Test the relations generation when collection_name is not provided." + edata = dio.ExportData(config=globalconfig1, content="depth") + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_relations() + + # no collection_name given, relations shall be None + assert mymeta.dataio.collection_name is None + assert mymeta.meta_relations is None + + +def test_metadata_relations_with_case_uuid(globalconfig1, fmurun_w_casemetadata): + """Confirm collection changes with different case_uuids.""" + + cname = "mycollection" + + # produce with original fmu.case.uuid + edata = dio.ExportData(config=globalconfig1, content="depth", collection_name=cname) + edata._rootpath = fmurun_w_casemetadata + mymeta = _MetaData("dummy", edata, verbosity="DEBUG") + mymeta._populate_meta_fmu() + mymeta._populate_meta_relations() + first = deepcopy(mymeta.meta_relations["collections"][0]) + assert len(mymeta.meta_relations["collections"]) == 1 + + # produce again, verify identical + mymeta._populate_meta_relations() + same_as_first = deepcopy(mymeta.meta_relations["collections"][0]) + assert first == same_as_first + + # modify fmu.case.uuid and produce again + newuuid = "b31b05e8-e47f-47b1-8fee-e94b2234aa21" + mymeta.meta_fmu["case"]["uuid"] = newuuid + mymeta._populate_meta_relations() + second = deepcopy(mymeta.meta_relations["collections"][0]) + assert len(mymeta.meta_relations["collections"]) == 1 + + # verify different + assert first != second + assert len(first) == len(second) == 36 + + +def test_metadata_relations_one_collection_name(globalconfig1): + """Test the relations generation when collection name is provided as list with one + member. Also test that similar behaviour if list or not. + + collection_name = ["tst"] and collection_name = "tst" shall give same result. + + """ + + # === Input as list[str] + edata_list = dio.ExportData( + config=globalconfig1, content="depth", collection_name=["tst"] + ) + mymeta_list = _MetaData("dummy", edata_list, verbosity="DEBUG") + mymeta_list._populate_meta_relations() + + assert "collections" in mymeta_list.meta_relations + assert isinstance(mymeta_list.meta_relations["collections"], list) + assert len(mymeta_list.meta_relations["collections"]) == 1 + + collections_ref_list = mymeta_list.meta_relations["collections"][0] + + assert isinstance(collections_ref_list, str) + assert len(collections_ref_list) == 36 # poor mans verification of uuid4 + + # === Input as str + edata_str = dio.ExportData( + config=globalconfig1, content="depth", collection_name="tst" + ) + mymeta_str = _MetaData("dummy", edata_str, verbosity="DEBUG") + mymeta_str._populate_meta_relations() + + assert "collections" in mymeta_str.meta_relations + assert isinstance(mymeta_str.meta_relations["collections"], list) + assert len(mymeta_str.meta_relations["collections"]) == 1 + + collections_ref_str = mymeta_str.meta_relations["collections"][0] + + assert isinstance(collections_ref_str, str) + assert len(collections_ref_str) == 36 # poor mans verification of uuid4 + + # === Confirm identical + assert collections_ref_str == collections_ref_list + + +def test_metadata_relations_multiple_collection_name(globalconfig1): + """Test the relations generation when multiple collection name is provided.""" + edata = dio.ExportData( + config=globalconfig1, content="depth", collection_name=["tst", "tst2", "tst3"] + ) + mymeta = _MetaData("dummy", edata) + mymeta._populate_meta_relations() + + assert "collections" in mymeta.meta_relations + assert isinstance(mymeta.meta_relations["collections"], list) + assert len(mymeta.meta_relations["collections"]) == 3 + + for collections_ref in mymeta.meta_relations["collections"]: + assert isinstance(collections_ref, str) + assert len(collections_ref) == 36 # poor mans verification of uuid4 + + # -------------------------------------------------------------------------------------- # The GENERATE method # -------------------------------------------------------------------------------------- From 51dbade97a9574e179ce0c6ef98eefffd283224b Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen Date: Sat, 18 Nov 2023 20:25:25 +0100 Subject: [PATCH 4/6] lint --- tests/test_units/test_metadata_class.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index d37fdb692..7430b3133 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -1,7 +1,6 @@ """Test the _MetaData class from the _metadata.py module""" import logging from copy import deepcopy -import os import pytest From e1c43703c5c6f737058e7171c27bdb22a5c81f60 Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen Date: Tue, 21 Nov 2023 14:59:48 +0100 Subject: [PATCH 5/6] relations defined as list of objects --- schema/definitions/0.8.0/examples/table_inplace.yml | 6 ++++-- schema/definitions/0.8.0/schema/fmu_results.json | 13 ++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/schema/definitions/0.8.0/examples/table_inplace.yml b/schema/definitions/0.8.0/examples/table_inplace.yml index ca41baf3a..7924407ac 100644 --- a/schema/definitions/0.8.0/examples/table_inplace.yml +++ b/schema/definitions/0.8.0/examples/table_inplace.yml @@ -69,8 +69,10 @@ file: relations: collections: - - 1b4fb471-159d-4b46-9b25-229419cd349b - - 3d338cd7-926a-49c0-8d6b-8ae2063f681f + - name: MyCollection + uuid: 1b4fb471-159d-4b46-9b25-229419cd349b + - name: MySecondCollection + uuid: 3d338cd7-926a-49c0-8d6b-8ae2063f681f data: # The data block describes the actual data (e.g. surface). Only present in data objects diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index 5cdd68cc3..3ecf49100 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -668,7 +668,18 @@ "collections": { "type": "array", "items": { - "$ref": "#/definitions/generic/uuid" + "type": "object", + "properties": { + "name": { + "type": "string", + "examples": [ + "MyCollectionName" + ] + }, + "uuid": { + "$ref": "#/definitions/generic/uuid" + } + } } } }, From b6ce24a6df557b1787c0719323ebcb86f801b3b6 Mon Sep 17 00:00:00 2001 From: Per Olav Eide Svendsen Date: Tue, 21 Nov 2023 15:00:24 +0100 Subject: [PATCH 6/6] relations.collections produced as object with name and uuid --- src/fmu/dataio/_metadata.py | 2 +- tests/test_units/test_dataio.py | 19 ++++++++------ tests/test_units/test_metadata_class.py | 33 ++++++++++++++----------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index bbef6f5df..20c371708 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -201,7 +201,7 @@ def generate_meta_relations(dataio, meta_fmu: dict) -> Optional[dict]: logger.debug("Making collection uuid for %s", cname) collection_uuid = uuid_from_string(f"{case_uuid}{cname}") logger.debug("uuid returned was %s", collection_uuid) - r_meta["collections"].append(collection_uuid) + r_meta["collections"].append({"name": cname, "uuid": collection_uuid}) return r_meta diff --git a/tests/test_units/test_dataio.py b/tests/test_units/test_dataio.py index c38df85c1..6df0edc5f 100644 --- a/tests/test_units/test_dataio.py +++ b/tests/test_units/test_dataio.py @@ -505,12 +505,17 @@ def test_exportdata_with_collection_name(globalconfig2, regsurf, arrowtable, pol edata = ExportData( config=globalconfig2, content="volumes", collection_name="mycollection" ) - metadatas = [] - for obj in [regsurf, arrowtable, polygons]: - metadatas.append(edata.generate_metadata(obj, name="myname")) - collection_uuids = [ - metadata["relations"]["collections"][0] for metadata in metadatas + # Use .generate_metadata, not .export, as file export is not needed here. + metadatas = [ + edata.generate_metadata(obj, name="myname") + for obj in [regsurf, arrowtable, polygons] ] - assert len(set(collection_uuids)) == 1 - assert len(collection_uuids[0]) == 36 + + # Have now produced metadata for 3 objects, all of which shall belong to the same + # collection. Hence they will all have exactly 1 entry in relation.collections, and + # they will all be identical. + + collections = [metadata["relations"]["collections"] for metadata in metadatas] + for collection in collections: + assert collection == collections[0] diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index 7430b3133..d4dfd1bd9 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -336,9 +336,10 @@ def test_metadata_relations_with_case_uuid(globalconfig1, fmurun_w_casemetadata) second = deepcopy(mymeta.meta_relations["collections"][0]) assert len(mymeta.meta_relations["collections"]) == 1 - # verify different - assert first != second - assert len(first) == len(second) == 36 + # verify differences + assert first["uuid"] != second["uuid"] # different case, different uuid + assert first["name"] == second["name"] # different case, but SAME name + assert len(first["uuid"]) == len(second["uuid"]) == 36 def test_metadata_relations_one_collection_name(globalconfig1): @@ -360,10 +361,15 @@ def test_metadata_relations_one_collection_name(globalconfig1): assert isinstance(mymeta_list.meta_relations["collections"], list) assert len(mymeta_list.meta_relations["collections"]) == 1 - collections_ref_list = mymeta_list.meta_relations["collections"][0] + collections_1 = mymeta_list.meta_relations["collections"] + assert isinstance(collections_1, list) - assert isinstance(collections_ref_list, str) - assert len(collections_ref_list) == 36 # poor mans verification of uuid4 + mycollection = mymeta_list.meta_relations["collections"][0] + assert isinstance(mycollection, dict) + assert "name" in mycollection + assert "uuid" in mycollection + + assert len(mycollection["uuid"]) == 36 # poor mans verification of uuid4 # === Input as str edata_str = dio.ExportData( @@ -376,13 +382,8 @@ def test_metadata_relations_one_collection_name(globalconfig1): assert isinstance(mymeta_str.meta_relations["collections"], list) assert len(mymeta_str.meta_relations["collections"]) == 1 - collections_ref_str = mymeta_str.meta_relations["collections"][0] - - assert isinstance(collections_ref_str, str) - assert len(collections_ref_str) == 36 # poor mans verification of uuid4 - # === Confirm identical - assert collections_ref_str == collections_ref_list + assert mymeta_str.meta_relations["collections"][0] == collections_1[0] def test_metadata_relations_multiple_collection_name(globalconfig1): @@ -397,9 +398,11 @@ def test_metadata_relations_multiple_collection_name(globalconfig1): assert isinstance(mymeta.meta_relations["collections"], list) assert len(mymeta.meta_relations["collections"]) == 3 - for collections_ref in mymeta.meta_relations["collections"]: - assert isinstance(collections_ref, str) - assert len(collections_ref) == 36 # poor mans verification of uuid4 + for collection in mymeta.meta_relations["collections"]: + assert isinstance(collection, dict) + assert "uuid" in collection + assert "name" in collection + assert len(collection["uuid"]) == 36 # poor mans verification of uuid4 # --------------------------------------------------------------------------------------