diff --git a/src/fmu/dataio/_design_kw.py b/src/fmu/dataio/_design_kw.py deleted file mode 100644 index 0ba9e6536..000000000 --- a/src/fmu/dataio/_design_kw.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -This file is a duplicate of -https://github.com/equinor/semeio/blob/master/semeio/jobs/design_kw/design_kw.py - -It is copied here instead of pip-installed in order to avoid dragging -along all dependencies of semeio""" - -from __future__ import annotations - -import re -import shlex -from typing import Any, Final, Iterable - -from ._logging import null_logger - -_STATUS_FILE_NAME: Final = "DESIGN_KW.OK" - -_logger: Final = null_logger(__name__) - - -def run( - template_file_name: str, - result_file_name: str, - log_level: str, - parameters_file_name: str = "parameters.txt", -) -> None: - # Get all key, value pairs - # If FWL key is having multiple entries in the parameters file - # KeyError is raised. This will be logged, and no OK - # file is written - _logger.setLevel(log_level) - - with open(parameters_file_name) as parameters_file: - parameters = parameters_file.readlines() - - key_vals = extract_key_value(parameters) - - key_vals.update(rm_genkw_prefix(key_vals)) - - with open(template_file_name) as template_file: - template = template_file.readlines() - - valid = True - with open(result_file_name, "w") as result_file: - for line in template: - if not is_comment(line): - for key, value in key_vals.items(): - line = line.replace(f"<{key}>", str(value)) - - if not all_matched(line, template_file_name, template): - valid = False - - result_file.write(line) - - if valid: - with open(_STATUS_FILE_NAME, "w") as status_file: - status_file.write("DESIGN_KW OK\n") - - -def all_matched(line: str, template_file_name: str, template: list[str]) -> bool: - valid = True - for unmatched in unmatched_templates(line): - if is_perl(template_file_name, template): - _logger.warning( # pylint: disable=logging-fstring-interpolation - f"{unmatched} not found in design matrix, " - f"but this is probably a Perl file" - ) - else: - _logger.error( # pylint: disable=logging-fstring-interpolation - f"{unmatched} not found in design matrix" - ) - valid = False - return valid - - -def is_perl(file_name: str, template: list[str]) -> bool: - return bool(file_name.endswith(".pl") or template[0].find("perl") != -1) - - -def unmatched_templates(line: str) -> list[str]: - bracketpattern = re.compile("<.+?>") - if bracketpattern.search(line): - return bracketpattern.findall(line) - return [] - - -def is_comment(line: str) -> bool: - ecl_comment_pattern = re.compile("^--") - std_comment_pattern = re.compile("^#") - return bool(ecl_comment_pattern.search(line) or std_comment_pattern.search(line)) - - -def extract_key_value(parameters: Iterable[str]) -> dict[str, str]: - """Parses a list of strings, looking for key-value pairs pr. line - separated by whitespace, into a dictionary. - - Spaces in keys and/or values are supported if quoted. Quotes - in keys/values are not supported. - - Args: - parameters (list of str) - - Returns: - dict, with the keys and values parsed. - - Raises: - ValueError, with error messages and all unparseable lines. - """ - res = {} - errors = [] - for line in parameters: - line_parts = shlex.split(line) - if not line_parts: - continue - if len(line_parts) == 1: - errors += [f"No value found in line {line}"] - continue - if len(line_parts) > 2: - errors += [f"Too many values found in line {line}"] - continue - key, value = line_parts - if key in res: - errors += [f"{key} is defined multiple times"] - continue - res[key] = value - if errors: - raise ValueError("\n".join(errors)) - return res - - -def rm_genkw_prefix( - paramsdict: dict[str, Any], - ignoreprefixes: str | list[str] | None = "LOG10_", -) -> dict[str, Any]: - """Strip prefixes from keys in a dictionary. - - Prefix is any string before a colon. No colon means no prefix. - - Only keys unique after prefix-stripping - are included. For intentional duplicates, as when ERT - prepares LOG10_ values, these are ignored by default in this - function. - - Args: - paramsdict (dict): Dictionary with parameter names as keys. - ignoreprefixes (str or list of str): If any of these strings - are found at the start of the prefix, they are removed - from the dictionary before uniqueness is determined. - - Returns: - Subset of the incoming dictionary (ignored keys are dropped), and with - stripped prefixes from keys. - """ - if ignoreprefixes is None: - ignoreprefixes = [] - if isinstance(ignoreprefixes, str): - ignoreprefixes = [ignoreprefixes] - - ignoreprefixes = list(filter(None, ignoreprefixes)) - - for ignore_str in ignoreprefixes: - paramsdict = { - key: paramsdict[key] - for key in paramsdict - if ":" not in key or not key.startswith(ignore_str) - } - - keyvalues = [ - (key.split(":")[1], value) if ":" in key else (key, value) - for key, value in paramsdict.items() - ] - - keys = [keyval[0] for keyval in keyvalues] - - duplicates = {keyvalue[0] for keyvalue in keyvalues if keys.count(keyvalue[0]) > 1} - if duplicates: - _logger.warning(f"Key(s) {list(duplicates)} can only be used with prefix.") - - return { - keyvalue[0]: keyvalue[1] - for keyvalue in keyvalues - if keys.count(keyvalue[0]) == 1 - } diff --git a/src/fmu/dataio/_utils.py b/src/fmu/dataio/_utils.py index 0e8c20f85..faa41be0a 100644 --- a/src/fmu/dataio/_utils.py +++ b/src/fmu/dataio/_utils.py @@ -6,6 +6,7 @@ import hashlib import json import os +import shlex import uuid from io import BufferedIOBase, BytesIO from pathlib import Path @@ -19,7 +20,7 @@ from fmu.config import utilities as ut -from . import _design_kw, types +from . import types from ._logging import null_logger from .readers import FaultRoomSurface @@ -222,71 +223,28 @@ def uuid_from_string(string: str) -> uuid.UUID: def read_parameters_txt(pfile: Path | str) -> types.Parameters: """Read the parameters.txt file and convert to a dict. The parameters.txt file has this structure:: - SENSNAME rms_seed - SENSCASE p10_p90 RMS_SEED 1000 KVKH_CHANNEL 0.6 - KVKH_CREVASSE 0.3 GLOBVAR:VOLON_FLOODPLAIN_VOLFRAC 0.256355 GLOBVAR:VOLON_PERMH_CHANNEL 1100 - GLOBVAR:VOLON_PORO_CHANNEL 0.2 LOG10_GLOBVAR:FAULT_SEAL_SCALING 0.685516 LOG10_MULTREGT:MULT_THERYS_VOLON -3.21365 - LOG10_MULTREGT:MULT_VALYSAR_THERYS -3.2582 - ...but may also appear on a justified format, with leading - whitespace and tab-justified columns, legacy from earlier - versions but kept alive by some users:: - SENSNAME rms_seed - SENSCASE p10_p90 - RMS_SEED 1000 - KVKH_CHANNEL 0.6 - GLOBVAR:VOLON_PERMH_CHANNEL 1100 - LOG10_GLOBVAR:FAULT_SEAL_SCALING 0.685516 - LOG10_MULTREGT:MULT_THERYS_VOLON -3.21365 - This should be parsed as:: - { - "SENSNAME": "rms_seed" - "SENSCASE": "p10_p90" - "RMS_SEED": 1000 - "KVKH_CHANNEL": 0.6 - "KVKH_CREVASSE": 0.3 - "GLOBVAR": {"VOLON_FLOODPLAIN_VOLFRAC": 0.256355, ...etc} - } """ logger.debug("Reading parameters.txt from %s", pfile) - parameterlines = Path(pfile).read_text().splitlines() + res: types.Parameters = {} - dict_str_to_str = _design_kw.extract_key_value(parameterlines) - return {key: check_if_number(value) for key, value in dict_str_to_str.items()} + with open(pfile) as f: + for line in f: + line_parts = shlex.split(line) + if len(line_parts) == 2: + key, value = line_parts + res[key] = check_if_number(value) + else: + raise ValueError(f"More or less than two items found in line {line}") - -def nested_parameters_dict(paramdict: dict[str, str | int | float]) -> types.Parameters: - """Interpret a flat parameters dictionary into a nested dictionary, based on - presence of colons in keys. - - This assumes that what comes before a ":" is sort of a namespace identifier. - - In design_kw (semeio) this namespace identifier is actively ignored, meaning that - the keys without the namespace must be unique. - """ - nested_dict: types.Parameters = {} - unique_keys: list[str] = [] - for key, value in paramdict.items(): - if ":" in key: - subdict, newkey = key.split(":", 1) - if not newkey: - raise ValueError(f"Empty parameter name in {key} after removing prefix") - if subdict not in nested_dict: - nested_dict[subdict] = {} - unique_keys.append(newkey) - nested_dict[subdict][newkey] = value # type: ignore - else: - unique_keys.append(key) - nested_dict[key] = value - - return nested_dict + return res def check_if_number(value: str | None) -> int | float | str | None: diff --git a/src/fmu/dataio/providers/_fmu.py b/src/fmu/dataio/providers/_fmu.py index 09121aa17..9d87e9a9d 100644 --- a/src/fmu/dataio/providers/_fmu.py +++ b/src/fmu/dataio/providers/_fmu.py @@ -288,20 +288,6 @@ def _get_restart_data_uuid(self) -> UUID | None: ) return None - def _get_ert_parameters(self) -> fields.Parameters | None: - logger.debug("Read ERT parameters") - assert self._runpath is not None - parameters_file = self._runpath / "parameters.txt" - if not parameters_file.exists(): - warn("The parameters.txt file was not found", UserWarning) - return None - - params = _utils.read_parameters_txt(parameters_file) - logger.debug("parameters.txt parsed.") - # BUG(?): value can contain Nones, loop in fn. below - # does contains check, will fail. - return fields.Parameters(root=_utils.nested_parameters_dict(params)) # type: ignore - def _get_iteration_and_real_uuid(self, case_uuid: UUID) -> tuple[UUID, UUID]: iter_uuid = _utils.uuid_from_string(f"{case_uuid}{self._iter_name}") real_uuid = _utils.uuid_from_string(f"{case_uuid}{iter_uuid}{self._real_id}") @@ -320,7 +306,6 @@ def _get_realization_meta(self, real_uuid: UUID) -> fields.Realization: return fields.Realization( id=self._real_id, name=self._real_name, - parameters=self._get_ert_parameters(), uuid=real_uuid, ) diff --git a/tests/test_units/test_dictionary.py b/tests/test_units/test_dictionary.py index a4e8a7c1d..27048e0a9 100644 --- a/tests/test_units/test_dictionary.py +++ b/tests/test_units/test_dictionary.py @@ -9,7 +9,7 @@ import yaml from fmu.dataio import ExportData -from fmu.dataio._utils import nested_parameters_dict, read_parameters_txt +from fmu.dataio._utils import read_parameters_txt @pytest.fixture(name="direct_creation", scope="function") @@ -51,19 +51,6 @@ def _fixture_simple_parameters(fmurun_w_casemetadata): return read_parameters_txt(fmurun_w_casemetadata / "parameters.txt") -@pytest.fixture(name="nested_parameters", scope="function") -def _fixture_nested_parameters(simple_parameters): - """Return dictionary read from parameters.txt and split on : in original key - - Args: - simple_parameters (dict): dictionary parsed from parameters.txt - - Returns: - dict: the parameters as nested dictionary - """ - return nested_parameters_dict(simple_parameters) - - def assert_dict_correct(result_dict, meta, name): """Assert dictionary and some metadata @@ -103,7 +90,6 @@ def read_dict_and_meta(path): ("direct_creation"), ("json_dict"), ("simple_parameters"), - ("nested_parameters"), ], ) def test_export_dict_w_meta(globalconfig2, dictionary, request, monkeypatch, tmp_path): @@ -145,7 +131,7 @@ def test_invalid_dict( def test_read_parameters_txt(): with NamedTemporaryFile() as tf: tf.write( - b"""SENSNAME rms_seed + b"""SENSNAME 'rms seed' SENSCASE p10_p90 RMS_SEED 1000 KVKH_CHANNEL 0.6 @@ -160,24 +146,7 @@ def test_read_parameters_txt(): ) tf.flush() assert read_parameters_txt(tf.name) == { - "SENSNAME": "rms_seed", - "SENSCASE": "p10_p90", - "RMS_SEED": 1000, - "KVKH_CHANNEL": 0.6, - "KVKH_CREVASSE": 0.3, - "GLOBVAR:VOLON_FLOODPLAIN_VOLFRAC": 0.256355, - "GLOBVAR:VOLON_PERMH_CHANNEL": 1100, - "GLOBVAR:VOLON_PORO_CHANNEL": 0.2, - "LOG10_GLOBVAR:FAULT_SEAL_SCALING": 0.685516, - "LOG10_MULTREGT:MULT_THERYS_VOLON": -3.21365, - "LOG10_MULTREGT:MULT_VALYSAR_THERYS": -3.2582, - } - - -def test_nested_parameters_dict(): - assert nested_parameters_dict( - { - "SENSNAME": "rms_seed", + "SENSNAME": "rms seed", "SENSCASE": "p10_p90", "RMS_SEED": 1000, "KVKH_CHANNEL": 0.6, @@ -189,20 +158,3 @@ def test_nested_parameters_dict(): "LOG10_MULTREGT:MULT_THERYS_VOLON": -3.21365, "LOG10_MULTREGT:MULT_VALYSAR_THERYS": -3.2582, } - ) == { - "SENSNAME": "rms_seed", - "SENSCASE": "p10_p90", - "RMS_SEED": 1000, - "KVKH_CHANNEL": 0.6, - "KVKH_CREVASSE": 0.3, - "GLOBVAR": { - "VOLON_FLOODPLAIN_VOLFRAC": 0.256355, - "VOLON_PERMH_CHANNEL": 1100, - "VOLON_PORO_CHANNEL": 0.2, - }, - "LOG10_GLOBVAR": {"FAULT_SEAL_SCALING": 0.685516}, - "LOG10_MULTREGT": { - "MULT_THERYS_VOLON": -3.21365, - "MULT_VALYSAR_THERYS": -3.2582, - }, - } diff --git a/tests/test_units/test_fmuprovider_class.py b/tests/test_units/test_fmuprovider_class.py index d2fec82f1..ca3d46db4 100644 --- a/tests/test_units/test_fmuprovider_class.py +++ b/tests/test_units/test_fmuprovider_class.py @@ -86,7 +86,7 @@ def test_fmuprovider_ert_provider_guess_casemeta_path(fmurun): def test_fmuprovider_ert_provider_missing_parameter_txt(fmurun_w_casemetadata): - """Test for an ERT case, when missing file parameter.txt (e.g. pred. run)""" + """Test for an ERT case, when missing file parameter.txt runs ok""" os.chdir(fmurun_w_casemetadata) @@ -97,8 +97,6 @@ def test_fmuprovider_ert_provider_missing_parameter_txt(fmurun_w_casemetadata): fmu_context=FMUContext.realization, workflow=WORKFLOW, ) - with pytest.warns(UserWarning, match="parameters.txt file was not found"): - myfmu.get_metadata() assert myfmu._case_name == "ertrun1" assert myfmu._real_name == "realization-0"