Skip to content

Commit

Permalink
ENH: Stop calling generate_metadata() on export when config is not valid
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt committed Sep 24, 2024
1 parent e357ea4 commit d12c2f8
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 27 deletions.
8 changes: 8 additions & 0 deletions docs/src/dataio_3_migration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ Additionally
- The ``return_symlink`` argument to ``export()`` is deprecated. It is redundant and can be removed.


Getting partial metadata from generate_metadata() when config is invalid
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
It was previously possible to get partial metadata from ``generate_metadata()``
when the global config file was invalid. This partial metadata was not valid according
to the datamodel and could not be uploaded to Sumo. Creating invalid metadata is no
longer supported, if the config is invalid an empty dictionary is returned instead.


Providing settings through environment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
It was previously possible to have a yml-file specifying global input arguments to
Expand Down
3 changes: 3 additions & 0 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def export_file(
) -> str:
"""Export a valid object to file"""

# create output folder if not existing
filename.parent.mkdir(parents=True, exist_ok=True)

if filename.suffix == ".gri" and isinstance(obj, xtgeo.RegularSurface):
obj.to_file(filename, fformat="irap_binary")
elif filename.suffix == ".csv" and isinstance(obj, (xtgeo.Polygons, xtgeo.Points)):
Expand Down
59 changes: 45 additions & 14 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
from .aggregation import AggregatedData
from .case import CreateCaseMetadata
from .preprocessed import ExportPreprocessedData
from .providers._filedata import FileDataProvider
from .providers._fmu import FmuProvider, get_fmu_context_from_environment
from .providers.objectdata._provider import objectdata_provider_factory

# DATAIO_EXAMPLES: Final = dataio_examples()
INSIDE_RMS: Final = detect_inside_rms()
Expand Down Expand Up @@ -728,6 +730,16 @@ def _update_fmt_flag(self) -> None:

def _update_check_settings(self, newsettings: dict) -> None:
"""Update instance settings (properties) from other routines."""
# if no newsettings (kwargs) this rutine is not needed
if not newsettings:
return

warnings.warn(
"In the future it will not be possible to enter following arguments "
f"inside the export() / generate_metadata() methods: {list(newsettings)}. "
"Please move them up to initialization of the ExportData instance.",
FutureWarning,
)
logger.info("Try new settings %s", newsettings)

# derive legal input from dataclass signature
Expand Down Expand Up @@ -801,6 +813,25 @@ def _get_fmu_provider(self) -> FmuProvider:
workflow=self.workflow,
)

def _export_without_metadata(self, obj: types.Inferrable) -> str:
"""
Export the object without a metadata file. The absolute export path
is found using the FileDataProvider directly.
A string with full path to the exported item is returned.
"""
self._update_fmt_flag()

fmudata = self._get_fmu_provider() if self._fmurun else None

filemeta = FileDataProvider(
dataio=self,
objdata=objectdata_provider_factory(obj, self),
runpath=fmudata.get_runpath() if fmudata else None,
).get_metadata()

assert filemeta.absolute_path is not None # for mypy
return export_file(obj, filename=filemeta.absolute_path, flag=self._usefmtflag)

# ==================================================================================
# Public methods:
# ==================================================================================
Expand Down Expand Up @@ -837,14 +868,14 @@ def generate_metadata(
logger.info("Generate metadata...")
logger.info("KW args %s", kwargs)

if kwargs:
if not isinstance(self.config, GlobalConfiguration):
warnings.warn(
"In the future it will not be possible to enter following arguments "
f"inside the export() / generate_metadata() methods: {list(kwargs)}. "
"Please move them up to initialization of the ExportData instance.",
"From fmu.dataio version 3.0 it will not be possible to produce "
"metadata when the global config is invalid.",
FutureWarning,
)
self._update_check_settings(kwargs)

self._update_check_settings(kwargs)

if isinstance(obj, (str, Path)):
if self.casepath is None:
Expand Down Expand Up @@ -899,21 +930,21 @@ def export(
is_observation=self.is_observation,
).export(obj)

metadata = self.generate_metadata(obj, compute_md5=True, **kwargs)
logger.info("Object type is: %s", type(obj))

# should only export object if config is not valid
if not isinstance(self.config, GlobalConfiguration):
warnings.warn("Data will be exported, but without metadata.", UserWarning)
self._update_check_settings(kwargs)
return self._export_without_metadata(obj)

metadata = self.generate_metadata(obj, compute_md5=True, **kwargs)
outfile = Path(metadata["file"]["absolute_path"])
# create output folders if they don't exist
outfile.parent.mkdir(parents=True, exist_ok=True)
metafile = outfile.parent / f".{outfile.name}.yml"

export_file(obj, outfile, flag=self._usefmtflag)
logger.info("Actual file is: %s", outfile)

if isinstance(self.config, GlobalConfiguration):
export_metadata_file(metafile, metadata, savefmt=self.meta_format)
logger.info("Metadata file is: %s", metafile)
else:
warnings.warn("Data will be exported, but without metadata.", UserWarning)

export_metadata_file(metafile, metadata, savefmt=self.meta_format)
logger.info("Metadata file is: %s", metafile)
return str(outfile)
110 changes: 101 additions & 9 deletions tests/test_units/test_dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ def test_missing_or_wrong_config_exports_with_warning(monkeypatch, tmp_path, reg
with pytest.warns(UserWarning, match="The global config"):
edata = ExportData(config={}, content="depth", name="mysurface")

meta = edata.generate_metadata(regsurf)
with pytest.warns(FutureWarning):
meta = edata.generate_metadata(regsurf)
assert "masterdata" not in meta

# check that obj is created but no metadata is found
Expand All @@ -60,6 +61,93 @@ def test_missing_or_wrong_config_exports_with_warning(monkeypatch, tmp_path, reg
read_metadata(out)


def test_wrong_config_exports_correctly_ouside_fmu(
monkeypatch, tmp_path, globalconfig1, regsurf
):
"""
In case a config is invalid, objects are exported without metadata.
Test that the export path is correct and equal one with valid config,
outside an fmu run.
"""

monkeypatch.chdir(tmp_path)
name = "mysurface"

with pytest.warns(UserWarning, match="The global config"), pytest.warns(
UserWarning, match="without metadata"
):
objpath_cfg_invalid = ExportData(
config={},
content="depth",
name=name,
).export(regsurf)

objpath_cfg_valid = ExportData(
config=globalconfig1,
content="depth",
name=name,
).export(regsurf)

assert Path(objpath_cfg_invalid) == tmp_path / f"share/results/maps/{name}.gri"
assert Path(objpath_cfg_invalid).exists()
assert Path(objpath_cfg_valid).exists()
assert objpath_cfg_invalid == objpath_cfg_valid

# test that it works with deprecated pattern also
with pytest.warns(FutureWarning):
objpath_cfg_valid = ExportData(config=globalconfig1).export(
regsurf,
content="depth",
name=name,
)
assert objpath_cfg_invalid == objpath_cfg_valid


def test_wrong_config_exports_correctly_in_fmu(
monkeypatch, fmurun_w_casemetadata, globalconfig1, regsurf
):
"""
In case a config is invalid, objects are exported without metadata.
Test that the export path is correct and equal to exports with valid config,
inside an fmu run.
"""

monkeypatch.chdir(fmurun_w_casemetadata)
name = "mysurface"

with pytest.warns(UserWarning, match="The global config"), pytest.warns(
UserWarning, match="without metadata"
):
objpath_cfg_invalid = ExportData(
config={},
content="depth",
name=name,
).export(regsurf)

objpath_cfg_valid = ExportData(
config=globalconfig1,
content="depth",
name=name,
).export(regsurf)

assert (
Path(objpath_cfg_invalid)
== fmurun_w_casemetadata / f"share/results/maps/{name}.gri"
)
assert Path(objpath_cfg_invalid).exists()
assert Path(objpath_cfg_valid).exists()
assert objpath_cfg_invalid == objpath_cfg_valid

# test that it works with deprecated pattern also
with pytest.warns(FutureWarning):
objpath_cfg_valid = ExportData(config=globalconfig1).export(
regsurf,
content="depth",
name=name,
)
assert objpath_cfg_invalid == objpath_cfg_valid


def test_config_miss_required_fields(monkeypatch, tmp_path, globalconfig1, regsurf):
"""Global config exists but missing critical data; export file but skip metadata."""

Expand Down Expand Up @@ -156,7 +244,7 @@ def test_update_check_settings_shall_fail(globalconfig1):

newsettings = {"invalidkey": "some"}
some = ExportData(config=globalconfig1, content="depth")
with pytest.raises(KeyError):
with pytest.warns(FutureWarning), pytest.raises(KeyError):
some._update_check_settings(newsettings)


Expand Down Expand Up @@ -285,7 +373,8 @@ def test_classification(globalconfig1, regsurf):
# verify that classification is defaulted to internal
with pytest.warns(UserWarning):
exp = ExportData(config={}, content="depth")
mymeta = exp.generate_metadata(regsurf)
with pytest.warns(FutureWarning):
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["classification"] == "internal"


Expand All @@ -308,16 +397,19 @@ def test_rep_include(globalconfig1, regsurf):
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is True

# test that rep_include is taken from config if not provided
# test that rep_include is defaulted to false if not provided
exp = ExportData(config=globalconfig1, content="depth")
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is False

# test that rep_include is defaulted False
with pytest.warns(UserWarning):
exp = ExportData(config={}, content="depth")
mymeta = exp.generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is False
# add ssdl.rep_include to the config
config = deepcopy(globalconfig1)
config["access"]["ssdl"] = {"rep_include": True}

# test that rep_include can be read from config
with pytest.warns(FutureWarning, match="is deprecated"):
mymeta = ExportData(config=config, content="depth").generate_metadata(regsurf)
assert mymeta["access"]["ssdl"]["rep_include"] is True


def test_unit_is_none(globalconfig1, regsurf):
Expand Down
5 changes: 1 addition & 4 deletions tests/test_units/test_preprocessed.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,7 @@ def test_export_preprocessed_file_exportdata_casepath_on_export(
edata = dataio.ExportData(config=rmsglobalconfig, is_observation=True)

# test that error is thrown when missing casepath
# (UserWarning initially in ExportData)
with pytest.warns(UserWarning, match="case metadata"), pytest.raises(
TypeError, match="No 'casepath' argument provided"
):
with pytest.raises(TypeError, match="No 'casepath' argument provided"):
edata.export(surfacepath)

# test that export() works if casepath is provided
Expand Down

0 comments on commit d12c2f8

Please sign in to comment.