From 264ae9b8f8353f7714082e28a404d9d2752ab30d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20C=2E=20Riven=C3=A6s?= Date: Tue, 28 Jun 2022 12:22:16 +0200 Subject: [PATCH] Forcefolder further improve (#219) --- src/fmu/dataio/_filedata_provider.py | 30 +++--- src/fmu/dataio/_objectdata_provider.py | 14 ++- src/fmu/dataio/dataio.py | 24 ++++- tests/test_units/test_dataio.py | 27 ++++- tests/test_units/test_ert2_context.py | 97 ++++++++++++++++++ tests/test_units/test_rms_context.py | 135 +++++++++++++++++++++++++ 6 files changed, 303 insertions(+), 24 deletions(-) diff --git a/src/fmu/dataio/_filedata_provider.py b/src/fmu/dataio/_filedata_provider.py index 875772841..cc8692ef1 100644 --- a/src/fmu/dataio/_filedata_provider.py +++ b/src/fmu/dataio/_filedata_provider.py @@ -54,8 +54,6 @@ def __post_init__(self): self.extension = self.objdata.extension self.efolder = self.objdata.efolder - self.create_folder = self.dataio.createfolder - self.verify_folder = self.dataio.verifyfolder self.forcefolder = self.dataio.forcefolder self.forcefolder_is_absolute = False self.subfolder = self.dataio.subfolder @@ -150,23 +148,29 @@ def _get_path(self): dest = outroot / self.efolder # e.g. "maps" - if self.forcefolder: - # absolute if starts with "/", otherwise relative to outroot - if str(self.forcefolder).startswith("/"): - dest = Path(self.forcefolder) - dest = dest.absolute() - self.forcefolder_is_absolute = True + if self.dataio.forcefolder and self.dataio.forcefolder.startswith("/"): + if not self.dataio.allow_forcefolder_absolute: + raise ValueError( + "The forcefolder includes an absolute path, i.e. " + "starting with '/'. This is strongly discouraged and is only " + "allowed if classvariable allow_forcefolder_absolute is set to True" + ) else: - dest = self.rootpath / self.forcefolder + warn("Using absolute paths in forcefolder is not recommended!") + + # absolute if starts with "/", otherwise relative to outroot + dest = Path(self.dataio.forcefolder) + dest = dest.absolute() + self.forcefolder_is_absolute = True - if self.subfolder: - dest = dest / self.subfolder + if self.dataio.subfolder: + dest = dest / self.dataio.subfolder - if self.create_folder: + if self.dataio.createfolder: dest.mkdir(parents=True, exist_ok=True) # check that destination actually exists if verify_folder is True - if self.verify_folder and not dest.exists(): + if self.dataio.verifyfolder and not dest.exists(): raise IOError(f"Folder {str(dest)} is not present.") return dest diff --git a/src/fmu/dataio/_objectdata_provider.py b/src/fmu/dataio/_objectdata_provider.py index 15d6df523..5217bdce1 100644 --- a/src/fmu/dataio/_objectdata_provider.py +++ b/src/fmu/dataio/_objectdata_provider.py @@ -87,6 +87,7 @@ from dataclasses import dataclass, field from datetime import datetime as dt from typing import Any +from warnings import warn import numpy as np import pandas as pd # type: ignore @@ -95,8 +96,6 @@ from ._definitions import _ValidFormats from ._utils import generate_description -# from warnings import warn - try: import pyarrow as pa # type: ignore except ImportError: @@ -290,6 +289,17 @@ def _derive_objectdata(self): "This data type is not (yet) supported: ", type(self.obj) ) + # override efolder with forcefolder as exception! + if self.dataio.forcefolder and not self.dataio.forcefolder.startswith("/"): + ewas = result["efolder"] + result["efolder"] = self.dataio.forcefolder + msg = ( + f"The standard folder name is overrided from {ewas} to " + f"{self.dataio.forcefolder}" + ) + logger.info(msg) + warn(msg, UserWarning) + return result def _derive_spec_bbox_regularsurface(self): diff --git a/src/fmu/dataio/dataio.py b/src/fmu/dataio/dataio.py index e6d3be8f0..db8502a38 100644 --- a/src/fmu/dataio/dataio.py +++ b/src/fmu/dataio/dataio.py @@ -271,7 +271,6 @@ class ExportData: Args: - access_ssdl: Optional. A dictionary that will overwrite or append to the default ssdl settings read from the config. Example: ``{"access_level": "restricted", "rep_include": False}`` @@ -295,7 +294,7 @@ class ExportData: fmu_context: In normal forward models, the fmu_context is ``realization`` which is default and will put data per realization. Other contexts may be ``case`` - which willput data relative to the case root. If a non-FMU run is detected + which will put data relative to the case root. If a non-FMU run is detected (e.g. you run from project), fmu-dataio will detect that and set actual context to None as fall-back. @@ -306,8 +305,12 @@ class ExportData: forcefolder: This setting shall only be used as exception, and will make it possible to output to a non-standard folder. A ``/`` in front will indicate - an absolute path; otherwise it will be relative to casepath/rootpath. - Use with care. + an absolute path*; otherwise it will be relative to casepath or rootpath, + as dependent on the both fmu_context and the is_observations + boolean value. A typical use-case is forcefolder="seismic" which will + replace the "cubes" standard folder for Cube output with "seismics". + Use with care and avoid if possible! (*) For absolute paths, the class + variable allow_forcefolder_absolute must set to True. include_index: This applies to Pandas (table) data only, and if True then the index column will be exported. Deprecated, use class variable @@ -407,6 +410,7 @@ class ExportData: # ---------------------------------------------------------------------------------- # class variables + allow_forcefolder_absolute: ClassVar[bool] = False arrow_fformat: ClassVar[str] = "arrow" case_folder: ClassVar[str] = "share/metadata" createfolder: ClassVar[bool] = True @@ -1191,7 +1195,17 @@ def generate_metadata( return deepcopy(self._metadata) # alias method - generate_aggregation_metadata = generate_metadata + def generate_aggregation_metadata( + self, + obj: Any, + compute_md5: bool = True, + skip_null: bool = True, + **kwargs, + ) -> dict: + """Alias method name, see ``generate_metadata``""" + return self.generate_metadata( + obj, compute_md5=compute_md5, skip_null=skip_null, **kwargs + ) def export(self, obj, **kwargs) -> str: """Export aggregated file with metadata to file. diff --git a/tests/test_units/test_dataio.py b/tests/test_units/test_dataio.py index d937cda49..4b8aaf12c 100644 --- a/tests/test_units/test_dataio.py +++ b/tests/test_units/test_dataio.py @@ -1,6 +1,7 @@ """Test the dataio ExportData etc from the dataio.py module""" import logging import os +import sys import pytest import yaml @@ -167,6 +168,7 @@ def test_establish_pwd_runpath(tmp_path, globalconfig2): ExportData._inside_rms = False # reset +@pytest.mark.skipif("win" in sys.platform, reason="Windows tests have no /tmp") def test_forcefolder(tmp_path, globalconfig2, regsurf): """Testing the forcefolder mechanism.""" rmspath = tmp_path / "rms" / "model" @@ -174,15 +176,32 @@ def test_forcefolder(tmp_path, globalconfig2, regsurf): os.chdir(rmspath) ExportData._inside_rms = True - edata = ExportData(config=globalconfig2, forcefolder="share/observations/whatever") + edata = ExportData(config=globalconfig2, forcefolder="whatever") meta = edata.generate_metadata(regsurf) - assert meta["file"]["relative_path"].startswith("share/observations/whatever/") + assert meta["file"]["relative_path"].startswith("share/results/whatever/") ExportData._inside_rms = False # reset + +@pytest.mark.skipif("win" in sys.platform, reason="Windows tests have no /tmp") +def test_forcefolder_absolute_shall_raise(tmp_path, globalconfig2, regsurf): + """Testing the forcefolder mechanism, absoluteptah shall raise ValueError.""" + rmspath = tmp_path / "rms" / "model" + rmspath.mkdir(parents=True, exist_ok=True) + os.chdir(rmspath) + + ExportData._inside_rms = True + ExportData.allow_forcefolder_absolute = False + edata = ExportData(config=globalconfig2, forcefolder="/tmp/what") + with pytest.raises(ValueError): + meta = edata.generate_metadata(regsurf, name="x") + + ExportData.allow_forcefolder_absolute = True edata = ExportData(config=globalconfig2, forcefolder="/tmp/what") - meta = edata.generate_metadata(regsurf, name="x") + meta = edata.generate_metadata(regsurf, name="y") assert ( meta["file"]["relative_path"] == meta["file"]["absolute_path"] - == "/tmp/what/x.gri" + == "/tmp/what/y.gri" ) + ExportData.allow_forcefolder_absolute = False # reset + ExportData._inside_rms = False diff --git a/tests/test_units/test_ert2_context.py b/tests/test_units/test_ert2_context.py index 699599561..34f5be462 100644 --- a/tests/test_units/test_ert2_context.py +++ b/tests/test_units/test_ert2_context.py @@ -4,8 +4,10 @@ """ import logging import os +import sys import pandas as pd +import pytest import fmu.dataio.dataio as dataio from fmu.dataio._utils import prettyprint_dict @@ -210,6 +212,7 @@ def test_points_export_file_set_name_xtgeoheaders( # ====================================================================================== # Cube +# Also use this part to test various fmu_contexts and forcefolder # ====================================================================================== @@ -231,6 +234,100 @@ def test_cube_export_file_set_name(fmurun_w_casemetadata, rmsglobalconfig, cube) ) +def test_cube_export_file_is_observation(fmurun_w_casemetadata, rmsglobalconfig, cube): + """Export the cube to file with correct metadata..., with is_observation flag.""" + + logger.info("Active folder is %s", fmurun_w_casemetadata) + os.chdir(fmurun_w_casemetadata) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + output = edata.export( + cube, name="MyCube", is_observation=True, fmu_context="realization" + ) + logger.info("Output is %s", output) + + assert str(output) == str( + ( + edata._rootpath + / "realization-0/iter-0/share/observations/cubes/mycube.segy" + ).resolve() + ) + + +def test_cube_export_file_is_case_observation( + fmurun_w_casemetadata, rmsglobalconfig, cube +): + """Export the cube..., with is_observation flag and fmu_context is case.""" + + logger.info("Active folder is %s", fmurun_w_casemetadata) + os.chdir(fmurun_w_casemetadata) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + output = edata.export(cube, name="MyCube", is_observation=True, fmu_context="case") + logger.info("Output is %s", output) + + assert str(output) == str( + (edata._rootpath / "share/observations/cubes/mycube.segy").resolve() + ) + + +def test_cube_export_file_is_observation_forcefolder( + fmurun_w_casemetadata, rmsglobalconfig, cube +): + """Export the cube to file..., with is_observation flag and forcefolder.""" + + logger.info("Active folder is %s", fmurun_w_casemetadata) + os.chdir(fmurun_w_casemetadata) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + output = edata.export( + cube, + name="MyCube", + is_observation=True, + fmu_context="realization", + forcefolder="seismic", + ) + logger.info("Output is %s", output) + + assert str(output) == str( + ( + edata._rootpath + / "realization-0/iter-0/share/observations/seismic/mycube.segy" + ).resolve() + ) + + +@pytest.mark.skipif("win" in sys.platform, reason="Windows tests have no /tmp") +def test_cube_export_file_is_observation_forcefolder_abs( + fmurun_w_casemetadata, rmsglobalconfig, cube +): + """Export the cube to file..., with is_observation flag and absolute forcefolder. + + Using an absolute path requires class property allow_forcefolder_absolute = True + """ + + logger.info("Active folder is %s", fmurun_w_casemetadata) + os.chdir(fmurun_w_casemetadata) + + dataio.ExportData.allow_forcefolder_absolute = True + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + output = edata.export( + cube, + name="MyCube", + is_observation=True, + fmu_context="realization", + forcefolder="/tmp/seismic", + ) + logger.info("Output is %s", output) + + assert str(output) == "/tmp/seismic/mycube.segy" + dataio.ExportData.allow_forcefolder_absolute = False + + # ====================================================================================== # Grid and GridProperty # ====================================================================================== diff --git a/tests/test_units/test_rms_context.py b/tests/test_units/test_rms_context.py index ee5a88fba..862d27692 100644 --- a/tests/test_units/test_rms_context.py +++ b/tests/test_units/test_rms_context.py @@ -276,6 +276,7 @@ def test_points_export_file_set_name_xtgeoheaders(rmssetup, rmsglobalconfig, poi # ====================================================================================== # Cube +# This is also used to test various configurations of "forcefolder" and "fmu_context" # ====================================================================================== @@ -295,6 +296,140 @@ def test_cube_export_file_set_name(rmssetup, rmsglobalconfig, cube): ) +@inside_rms +def test_cube_export_file_set_name_as_observation(rmssetup, rmsglobalconfig, cube): + """Export the cube to file with correct metadata and name, is_observation.""" + logger.info("Active folder is %s", rmssetup) + os.chdir(rmssetup) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + output = edata.export(cube, name="MyCube", is_observation=True) + logger.info("Output is %s", output) + + assert str(output) == str( + (edata._rootpath / "share/observations/cubes/mycube.segy").resolve() + ) + + +@inside_rms +def test_cube_export_file_set_name_as_observation_forcefolder( + rmssetup, rmsglobalconfig, cube +): + """Export the cube to file with correct metadata and name, is_observation. + + In addition, use forcefolder to apply "seismic" instead of cube + """ + logger.info("Active folder is %s", rmssetup) + os.chdir(rmssetup) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + # use forcefolder to apply share/observations/seismic + output = edata.export( + cube, + name="MyCube", + fmu_context="realization", + is_observation=True, + forcefolder="seismic", + ) + logger.info("Output after force is %s", output) + + assert str(output) == str( + (edata._rootpath / "share/observations/seismic/mycube.segy").resolve() + ) + + +@inside_rms +def test_cube_export_as_observation_forcefolder_w_added_folder( + rmssetup, rmsglobalconfig, cube +): + """Export the cube to file with correct metadata and name, is_observation. + + In addition, use forcefolder with extra folder "xxx" (alternative to 'subfolder' + key). + """ + logger.info("Active folder is %s", rmssetup) + os.chdir(rmssetup) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + # use forcefolder to apply share/observations/seismic + output = edata.export( + cube, + name="MyCube", + is_observation=True, + forcefolder="seismic/xxx", + ) + logger.info("Output after force is %s", output) + + assert str(output) == str( + (edata._rootpath / "share/observations/seismic/xxx/mycube.segy").resolve() + ) + + +@inside_rms +def test_cube_export_as_observation_forcefolder_w_true_subfolder( + rmssetup, rmsglobalconfig, cube +): + """Export the cube to file with correct metadata and name, is_observation. + + In addition, use forcefolder and subfolders in combination. + """ + logger.info("Active folder is %s", rmssetup) + os.chdir(rmssetup) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + # use forcefolder to apply share/observations/seismic + output = edata.export( + cube, + name="MyCube", + is_observation=True, + forcefolder="seismic/xxx", + subfolder="mysubfolder", + ) + logger.info("Output after force is %s", output) + + assert str(output) == str( + ( + edata._rootpath / "share/observations/seismic/xxx/mysubfolder/mycube.segy" + ).resolve() + ) + + +@inside_rms +def test_cube_export_as_observation_forcefolder_w_subfolder_case( + rmssetup, rmsglobalconfig, cube +): + """Export the cube to file with correct metadata and name, is_observation. + + In addition, use forcefolder with subfolders to apply "seismic" instead of cube + and the fmu_context here is case, not realization. + + When inside RMS interactive, the case may be unresolved and hence the folder + shall be as is. + """ + logger.info("Active folder is %s", rmssetup) + os.chdir(rmssetup) + + edata = dataio.ExportData(config=rmsglobalconfig) # read from global config + + # use forcefolder to apply share/observations/seismic + output = edata.export( + cube, + name="MyCube", + is_observation=True, + fmu_context="case", + forcefolder="seismic/xxx", + ) + logger.info("Output after force is %s", output) + + assert str(output) == str( + (edata._rootpath / "share/observations/seismic/xxx/mycube.segy").resolve() + ) + + # ====================================================================================== # Grid and GridProperty # ======================================================================================