Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support data objects belong to a collection #396

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions schema/definitions/0.8.0/examples/table_inplace.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ file:
checksum_md5: kjhsdfvsdlfk23knerknvk23 # checksum of the file, not the data.
size_bytes: 5010144

relations:
collections:
- name: MyCollection
uuid: 1b4fb471-159d-4b46-9b25-229419cd349b
- name: MySecondCollection
uuid: 3d338cd7-926a-49c0-8d6b-8ae2063f681f

data: # The data block describes the actual data (e.g. surface). Only present in data objects

content: inplace_volumes # white-listed and standardized
Expand Down
26 changes: 26 additions & 0 deletions schema/definitions/0.8.0/schema/fmu_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,29 @@
}
}
},
"relations": {
"type": "object",
"properties": {
"collections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"examples": [
"MyCollectionName"
]
},
"uuid": {
"$ref": "#/definitions/generic/uuid"
}
}
}
}
},
"additionalProperties": false
},
"display": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -1229,6 +1252,9 @@
},
"class": {
"$ref": "#/definitions/class"
},
"relations": {
"$ref": "#/definitions/relations"
}
},
"oneOf": [
Expand Down
60 changes: 60 additions & 0 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
export_file_compute_checksum_md5,
glue_metadata_preprocessed,
read_metadata,
uuid_from_string,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -163,6 +164,48 @@ def generate_meta_access(config: dict) -> Optional[dict]:
return a_meta


def generate_meta_relations(dataio, meta_fmu: dict) -> Optional[dict]:
"""Generate the relations block.

The collection_name argument can be a str or a list, and shall be combined with the
current fmu.case.uuid in a valid uuid4. This shall be put into the
relations.collections attribute, which shall be a list.
"""

logger.debug("generate_meta_relations")
r_meta = {"collections": []}

if dataio.collection_name is None:
logger.debug("dataio.collection_name is None, returning empty")
return

if isinstance(dataio.collection_name, str):
logger.debug("collection_name is given as a string")
cnames = [dataio.collection_name]
elif isinstance(dataio.collection_name, list):
logger.debug("collection_name is given as a list")
cnames = dataio.collection_name
else:
raise ValueError("'collection_name' must be str or list.")

# case.uuid
if not meta_fmu:
# for non-FMU runs, RMS-gui, etc.
logger.debug("No case uuid")
case_uuid = ""
else:
case_uuid = meta_fmu["case"]["uuid"]
logger.debug("case uuid is %s", case_uuid)

for cname in cnames:
logger.debug("Making collection uuid for %s", cname)
collection_uuid = uuid_from_string(f"{case_uuid}{cname}")
logger.debug("uuid returned was %s", collection_uuid)
r_meta["collections"].append({"name": cname, "uuid": collection_uuid})

return r_meta


@dataclass
class _MetaData:
"""Class for sampling, process and holding all metadata in an ExportData instance.
Expand Down Expand Up @@ -208,6 +251,8 @@ class _MetaData:
meta_file: dict = field(default_factory=dict, init=False)
meta_tracklog: list = field(default_factory=list, init=False)
meta_fmu: dict = field(default_factory=dict, init=False)
meta_relations: dict = field(default_factory=dict, init=False)

# temporary storage for preprocessed data:
meta_xpreprocessed: dict = field(default_factory=dict, init=False)

Expand Down Expand Up @@ -340,6 +385,18 @@ def _populate_meta_access(self):
if self.dataio:
self.meta_access = generate_meta_access(self.dataio.config)

def _populate_meta_relations(self):
"""Populate the relations block.

The relations block shall contain information about relationships between this
data object and others.

TODO: Migrate existing relational attributes here, e.g. "parent".

"""
if self.dataio:
self.meta_relations = generate_meta_relations(self.dataio, self.meta_fmu)

def _populate_meta_xpreprocessed(self):
"""Populate a few necessary 'tmp' metadata needed for preprocessed data."""
if self.dataio.fmu_context == "preprocessed":
Expand Down Expand Up @@ -375,6 +432,7 @@ def generate_export_metadata(self, skip_null=True) -> dict: # TODO! -> skip_nul
self._populate_meta_fmu()
self._populate_meta_file()
self._populate_meta_xpreprocessed()
self._populate_meta_relations()

# glue together metadata, order is as legacy code (but will be screwed if reuse
# of existing metadata...)
Expand All @@ -391,6 +449,8 @@ def generate_export_metadata(self, skip_null=True) -> dict: # TODO! -> skip_nul
meta["access"] = self.meta_access
meta["masterdata"] = self.meta_masterdata

meta["relations"] = self.meta_relations

if self.dataio.fmu_context == "preprocessed":
meta["_preprocessed"] = self.meta_xpreprocessed

Expand Down
4 changes: 4 additions & 0 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,9 @@ class ExportData:
the file structure or by other means. See also fmu_context, where "case"
may need an explicit casepath!

collection_name: To include a data object in a collection. Shall be string or
list of strings. Will be combined with case uuid.

config: Required in order to produce valid metadata, either as key (here) or
through an environment variable. A dictionary with static settings.
In the standard case this is read from FMU global variables
Expand Down Expand Up @@ -561,6 +564,7 @@ class ExportData:
access_ssdl: dict = field(default_factory=dict)
aggregation: bool = False
casepath: Union[str, Path, None] = None
collection_name: Union[str, List[str], None] = None
config: dict = field(default_factory=dict)
content: Union[dict, str, None] = None
depth_reference: str = "msl"
Expand Down
29 changes: 29 additions & 0 deletions tests/test_schema/test_schema_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,32 @@ def test_schema_content_synch_with_code(schema_080):
raise ValueError(
f"content '{allowed_content}' allowed in code, but not schema."
)


def test_schema_relations(schema_080, metadata_examples):
"""Test the relations.collections."""

# fetch surface example
metadata = deepcopy(metadata_examples["table_inplace.yml"])

# test assumption
assert "relations" in metadata
assert isinstance(metadata["relations"]["collections"], list)

# validate as-is
jsonschema.validate(instance=metadata, schema=schema_080)

# non-uuid, shall fail
metadata["relations"]["collections"].append("non-uuid")
with pytest.raises(jsonschema.exceptions.ValidationError):
jsonschema.validate(instance=metadata, schema=schema_080)
metadata["relations"]["collections"].pop() # cleanup

# insert non-valid property, shall fail
metadata["relations"]["tst"] = "hei"
with pytest.raises(jsonschema.exceptions.ValidationError):
jsonschema.validate(instance=metadata, schema=schema_080)

# remove relations block, shall still validate (not required)
del metadata["relations"]
jsonschema.validate(instance=metadata, schema=schema_080)
22 changes: 22 additions & 0 deletions tests/test_units/test_dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,25 @@ def test_forcefolder_absolute_shall_raise_or_warn(tmp_path, globalconfig2, regsu
)
ExportData.allow_forcefolder_absolute = False # reset
ExportData._inside_rms = False


def test_exportdata_with_collection_name(globalconfig2, regsurf, arrowtable, polygons):
"""Test export of multiple objects with common collection_name."""

edata = ExportData(
config=globalconfig2, content="volumes", collection_name="mycollection"
)

# Use .generate_metadata, not .export, as file export is not needed here.
metadatas = [
edata.generate_metadata(obj, name="myname")
for obj in [regsurf, arrowtable, polygons]
]

# Have now produced metadata for 3 objects, all of which shall belong to the same
# collection. Hence they will all have exactly 1 entry in relation.collections, and
# they will all be identical.

collections = [metadata["relations"]["collections"] for metadata in metadatas]
for collection in collections:
assert collection == collections[0]
111 changes: 111 additions & 0 deletions tests/test_units/test_metadata_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,117 @@ def test_metadata_access_rep_include(globalconfig1):
"""Test the input of the rep_include field."""


# --------------------------------------------------------------------------------------
# RELATIONS block
# --------------------------------------------------------------------------------------


def test_metadata_relations_no_collection_name(globalconfig1):
"Test the relations generation when collection_name is not provided."
edata = dio.ExportData(config=globalconfig1, content="depth")
mymeta = _MetaData("dummy", edata)
mymeta._populate_meta_relations()

# no collection_name given, relations shall be None
assert mymeta.dataio.collection_name is None
assert mymeta.meta_relations is None


def test_metadata_relations_with_case_uuid(globalconfig1, fmurun_w_casemetadata):
"""Confirm collection changes with different case_uuids."""

cname = "mycollection"

# produce with original fmu.case.uuid
edata = dio.ExportData(config=globalconfig1, content="depth", collection_name=cname)
edata._rootpath = fmurun_w_casemetadata
mymeta = _MetaData("dummy", edata, verbosity="DEBUG")
mymeta._populate_meta_fmu()
mymeta._populate_meta_relations()
first = deepcopy(mymeta.meta_relations["collections"][0])
assert len(mymeta.meta_relations["collections"]) == 1

# produce again, verify identical
mymeta._populate_meta_relations()
same_as_first = deepcopy(mymeta.meta_relations["collections"][0])
assert first == same_as_first

# modify fmu.case.uuid and produce again
newuuid = "b31b05e8-e47f-47b1-8fee-e94b2234aa21"
mymeta.meta_fmu["case"]["uuid"] = newuuid
mymeta._populate_meta_relations()
second = deepcopy(mymeta.meta_relations["collections"][0])
assert len(mymeta.meta_relations["collections"]) == 1

# verify differences
assert first["uuid"] != second["uuid"] # different case, different uuid
assert first["name"] == second["name"] # different case, but SAME name
assert len(first["uuid"]) == len(second["uuid"]) == 36


def test_metadata_relations_one_collection_name(globalconfig1):
"""Test the relations generation when collection name is provided as list with one
member. Also test that similar behaviour if list or not.

collection_name = ["tst"] and collection_name = "tst" shall give same result.

"""

# === Input as list[str]
edata_list = dio.ExportData(
config=globalconfig1, content="depth", collection_name=["tst"]
)
mymeta_list = _MetaData("dummy", edata_list, verbosity="DEBUG")
mymeta_list._populate_meta_relations()

assert "collections" in mymeta_list.meta_relations
assert isinstance(mymeta_list.meta_relations["collections"], list)
assert len(mymeta_list.meta_relations["collections"]) == 1

collections_1 = mymeta_list.meta_relations["collections"]
assert isinstance(collections_1, list)

mycollection = mymeta_list.meta_relations["collections"][0]
assert isinstance(mycollection, dict)
assert "name" in mycollection
assert "uuid" in mycollection

assert len(mycollection["uuid"]) == 36 # poor mans verification of uuid4

# === Input as str
edata_str = dio.ExportData(
config=globalconfig1, content="depth", collection_name="tst"
)
mymeta_str = _MetaData("dummy", edata_str, verbosity="DEBUG")
mymeta_str._populate_meta_relations()

assert "collections" in mymeta_str.meta_relations
assert isinstance(mymeta_str.meta_relations["collections"], list)
assert len(mymeta_str.meta_relations["collections"]) == 1

# === Confirm identical
assert mymeta_str.meta_relations["collections"][0] == collections_1[0]


def test_metadata_relations_multiple_collection_name(globalconfig1):
"""Test the relations generation when multiple collection name is provided."""
edata = dio.ExportData(
config=globalconfig1, content="depth", collection_name=["tst", "tst2", "tst3"]
)
mymeta = _MetaData("dummy", edata)
mymeta._populate_meta_relations()

assert "collections" in mymeta.meta_relations
assert isinstance(mymeta.meta_relations["collections"], list)
assert len(mymeta.meta_relations["collections"]) == 3

for collection in mymeta.meta_relations["collections"]:
assert isinstance(collection, dict)
assert "uuid" in collection
assert "name" in collection
assert len(collection["uuid"]) == 36 # poor mans verification of uuid4


# --------------------------------------------------------------------------------------
# The GENERATE method
# --------------------------------------------------------------------------------------
Expand Down
13 changes: 13 additions & 0 deletions tests/test_units/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,16 @@
)
def test_check_if_number(value, result):
assert utils.check_if_number(value) == result


def test_uuid_from_string():
"""Test the uuid_from_string method."""
result = utils.uuid_from_string("mystring")
assert len(result) == 36
assert isinstance(result, str)

# test repeatability
first = utils.uuid_from_string("mystring")
second = utils.uuid_from_string("mystring")

assert first == second
Loading