From 6bb5eed0c06cb18da5b73eb211a8cf8d66a71d87 Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Wed, 23 Oct 2024 10:29:12 +0200 Subject: [PATCH] Make InvalidResponseFile and FileNotFoundError part of read_from_file --- src/ert/callbacks.py | 23 +++++-- src/ert/config/__init__.py | 3 +- src/ert/config/_read_summary.py | 40 +++++++---- src/ert/config/gen_data_config.py | 33 ++++++--- src/ert/config/response_config.py | 17 ++++- src/ert/config/summary_config.py | 4 +- .../unit_tests/config/test_gen_data_config.py | 69 ++++++++++++++++++- tests/unit_tests/config/test_read_summary.py | 23 ++++--- .../unit_tests/config/test_summary_config.py | 39 ++++++++++- 9 files changed, 203 insertions(+), 48 deletions(-) diff --git a/src/ert/callbacks.py b/src/ert/callbacks.py index 32b9f639c6d..1511dfe48f1 100644 --- a/src/ert/callbacks.py +++ b/src/ert/callbacks.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Iterable -from ert.config import ParameterConfig, ResponseConfig +from ert.config import InvalidResponseFile, ParameterConfig, ResponseConfig from ert.run_arg import RunArg from ert.storage.realization_storage_state import RealizationStorageState @@ -54,7 +54,12 @@ async def _write_responses_to_storage( try: start_time = time.perf_counter() logger.debug(f"Starting to load response: {config.response_type}") - ds = config.read_from_file(run_arg.runpath, run_arg.iens) + try: + ds = config.read_from_file(run_arg.runpath, run_arg.iens) + except (FileNotFoundError, InvalidResponseFile) as err: + errors.append(str(err)) + logger.warning(f"Failed to write: {run_arg.iens}: {err}") + continue await asyncio.sleep(0) logger.debug( f"Loaded {config.response_type}", @@ -69,9 +74,14 @@ async def _write_responses_to_storage( f"Saved {config.response_type} to storage", extra={"Time": f"{(time.perf_counter() - start_time):.4f}s"}, ) - except ValueError as err: + except Exception as err: errors.append(str(err)) - logger.warning(f"Failed to write: {run_arg.iens}", exc_info=err) + logger.exception( + f"Unexpected exception while writing response to storage {run_arg.iens}", + exc_info=err, + ) + continue + if errors: return LoadResult(LoadStatus.LOAD_FAILURE, "\n".join(errors)) return LoadResult(LoadStatus.LOAD_SUCCESSFUL, "") @@ -98,7 +108,10 @@ async def forward_model_ok( ) except Exception as err: - logger.exception(f"Failed to load results for realization {run_arg.iens}") + logger.exception( + f"Failed to load results for realization {run_arg.iens}", + exc_info=err, + ) parameters_result = LoadResult( LoadStatus.LOAD_FAILURE, "Failed to load results for realization " diff --git a/src/ert/config/__init__.py b/src/ert/config/__init__.py index f20c2a6ce6b..2faf5841e65 100644 --- a/src/ert/config/__init__.py +++ b/src/ert/config/__init__.py @@ -32,7 +32,7 @@ WarningInfo, ) from .queue_config import QueueConfig -from .response_config import ResponseConfig +from .response_config import InvalidResponseFile, ResponseConfig from .summary_config import SummaryConfig from .summary_observation import SummaryObservation from .surface_config import SurfaceConfig @@ -68,6 +68,7 @@ "GenKwConfig", "HookRuntime", "IESSettings", + "InvalidResponseFile", "ModelConfig", "ParameterConfig", "PriorDict", diff --git a/src/ert/config/_read_summary.py b/src/ert/config/_read_summary.py index 8b3c6b9567e..aca445e8664 100644 --- a/src/ert/config/_read_summary.py +++ b/src/ert/config/_read_summary.py @@ -25,6 +25,8 @@ from ert.summary_key_type import SummaryKeyType +from .response_config import InvalidResponseFile + def _cell_index( array_index: int, nx: PositiveInt, ny: PositiveInt @@ -44,7 +46,7 @@ def _check_if_missing( keyword_name: str, missing_key: str, *test_vars: Optional[T] ) -> List[T]: if any(v is None for v in test_vars): - raise ValueError( + raise InvalidResponseFile( f"Found {keyword_name} keyword in summary " f"specification without {missing_key} keyword" ) @@ -62,7 +64,13 @@ def make_summary_key( lj: Optional[int] = None, lk: Optional[int] = None, ) -> Optional[str]: - sum_type = SummaryKeyType.from_keyword(keyword) + try: + sum_type = SummaryKeyType.from_keyword(keyword) + except Exception as err: + raise InvalidResponseFile( + f"Could not read summary keyword '{keyword}': {err}" + ) from err + if sum_type in [ SummaryKeyType.FIELD, SummaryKeyType.OTHER, @@ -111,7 +119,7 @@ def make_summary_key( if sum_type == SummaryKeyType.NETWORK: (name,) = _check_if_missing("network", "WGNAMES", name) return f"{keyword}:{name}" - raise ValueError(f"Unexpected keyword type: {sum_type}") + raise InvalidResponseFile(f"Unexpected keyword type: {sum_type}") class DateUnit(Enum): @@ -123,7 +131,7 @@ def make_delta(self, val: float) -> timedelta: return timedelta(hours=val) if self == DateUnit.DAYS: return timedelta(days=val) - raise ValueError(f"Unknown date unit {val}") + raise InvalidResponseFile(f"Unknown date unit {val}") def _is_base_with_extension(base: str, path: str, exts: List[str]) -> bool: @@ -159,9 +167,9 @@ def _find_file_matching( dir, base = os.path.split(case) candidates = list(filter(lambda x: predicate(base, x), os.listdir(dir or "."))) if not candidates: - raise ValueError(f"Could not find any {kind} matching case path {case}") + raise FileNotFoundError(f"Could not find any {kind} matching case path {case}") if len(candidates) > 1: - raise ValueError( + raise FileNotFoundError( f"Ambiguous reference to {kind} in {case}, could be any of {candidates}" ) return os.path.join(dir, candidates[0]) @@ -188,7 +196,9 @@ def read_summary( summary, start_date, date_units, indices, date_index ) except resfo.ResfoParsingError as err: - raise ValueError(f"Failed to read summary file {filepath}: {err}") from err + raise InvalidResponseFile( + f"Failed to read summary file {filepath}: {err}" + ) from err return (start_date, keys, time_map, fetched) @@ -202,7 +212,7 @@ def _check_vals( kw: str, spec: str, vals: Union[npt.NDArray[Any], resfo.MESS] ) -> npt.NDArray[Any]: if vals is resfo.MESS or isinstance(vals, resfo.MESS): - raise ValueError(f"{kw.strip()} in {spec} has incorrect type MESS") + raise InvalidResponseFile(f"{kw.strip()} in {spec} has incorrect type MESS") return vals @@ -304,7 +314,7 @@ def _read_spec( # microsecond=self.micro_seconds % 10**6, ) except Exception as err: - raise ValueError( + raise InvalidResponseFile( f"SMSPEC {spec} contains invalid STARTDAT: {err}" ) from err keywords = arrays["KEYWORDS"] @@ -315,9 +325,9 @@ def _read_spec( lgr_names = arrays["LGRS "] if date is None: - raise ValueError(f"Keyword startdat missing in {spec}") + raise InvalidResponseFile(f"Keyword startdat missing in {spec}") if keywords is None: - raise ValueError(f"Keywords missing in {spec}") + raise InvalidResponseFile(f"Keywords missing in {spec}") if n is None: n = len(keywords) @@ -371,17 +381,17 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any: units = arrays["UNITS "] if units is None: - raise ValueError(f"Keyword units missing in {spec}") + raise InvalidResponseFile(f"Keyword units missing in {spec}") if date_index is None: - raise ValueError(f"KEYWORDS did not contain TIME in {spec}") + raise InvalidResponseFile(f"KEYWORDS did not contain TIME in {spec}") if date_index >= len(units): - raise ValueError(f"Unit missing for TIME in {spec}") + raise InvalidResponseFile(f"Unit missing for TIME in {spec}") unit_key = _key2str(units[date_index]) try: date_unit = DateUnit[unit_key] except KeyError: - raise ValueError(f"Unknown date unit in {spec}: {unit_key}") from None + raise InvalidResponseFile(f"Unknown date unit in {spec}: {unit_key}") from None return ( date_index, diff --git a/src/ert/config/gen_data_config.py b/src/ert/config/gen_data_config.py index 3f2a860cb11..4c16fb5cdc8 100644 --- a/src/ert/config/gen_data_config.py +++ b/src/ert/config/gen_data_config.py @@ -12,7 +12,7 @@ from ._option_dict import option_dict from .parsing import ConfigDict, ConfigValidationError, ErrorInfo -from .response_config import ResponseConfig +from .response_config import InvalidResponseFile, ResponseConfig from .responses_index import responses_index @@ -109,12 +109,16 @@ def from_config_dict(cls, config_dict: ConfigDict) -> Optional[Self]: def read_from_file(self, run_path: str, _: int) -> xr.Dataset: def _read_file(filename: Path, report_step: int) -> xr.Dataset: - if not filename.exists(): - raise ValueError(f"Missing output file: {filename}") - data = np.loadtxt(_run_path / filename, ndmin=1) + try: + data = np.loadtxt(_run_path / filename, ndmin=1) + except ValueError as err: + raise InvalidResponseFile(str(err)) from err active_information_file = _run_path / (str(filename) + "_active") if active_information_file.exists(): - active_list = np.loadtxt(active_information_file) + try: + active_list = np.loadtxt(active_information_file) + except ValueError as err: + raise InvalidResponseFile(str(err)) from err data[active_list == 0] = np.nan return xr.Dataset( {"values": (["report_step", "index"], [data])}, @@ -138,8 +142,8 @@ def _read_file(filename: Path, report_step: int) -> xr.Dataset: datasets_per_report_step.append( _read_file(_run_path / input_file, 0) ) - except ValueError as err: - errors.append(str(err)) + except (InvalidResponseFile, FileNotFoundError) as err: + errors.append(err) else: for report_step in report_steps: filename = input_file % report_step @@ -147,8 +151,8 @@ def _read_file(filename: Path, report_step: int) -> xr.Dataset: datasets_per_report_step.append( _read_file(_run_path / filename, report_step) ) - except ValueError as err: - errors.append(str(err)) + except (InvalidResponseFile, FileNotFoundError) as err: + errors.append(err) ds_all_report_steps = xr.concat( datasets_per_report_step, dim="report_step" @@ -156,7 +160,16 @@ def _read_file(filename: Path, report_step: int) -> xr.Dataset: datasets_per_name.append(ds_all_report_steps) if errors: - raise ValueError(f"Error reading GEN_DATA: {self.name}, errors: {errors}") + if all(isinstance(err, FileNotFoundError) for err in errors): + raise FileNotFoundError( + "Could not find one or more files/directories while reading GEN_DATA" + f" {self.name}: {','.join([str(err) for err in errors])}" + ) + else: + raise InvalidResponseFile( + "Error reading GEN_DATA " + f"{self.name}, errors: {','.join([str(err) for err in errors])}" + ) combined = xr.concat(datasets_per_name, dim="name") combined.attrs["response"] = "gen_data" diff --git a/src/ert/config/response_config.py b/src/ert/config/response_config.py index d40f899ed87..9f5a5648ffa 100644 --- a/src/ert/config/response_config.py +++ b/src/ert/config/response_config.py @@ -9,6 +9,13 @@ from ert.config.parsing import ConfigDict +class InvalidResponseFile(Exception): + """ + Raised when an input file of the ResponseConfig has + the incorrect format. + """ + + @dataclasses.dataclass class ResponseConfig(ABC): name: str @@ -16,7 +23,15 @@ class ResponseConfig(ABC): keys: List[str] = dataclasses.field(default_factory=list) @abstractmethod - def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: ... + def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: + """Reads the data for the response from run_path. + + Raises: + FileNotFoundError: when one of the input_files for the + response is missing. + InvalidResponseFile: when one of the input_files is + invalid + """ def to_dict(self) -> Dict[str, Any]: data = dataclasses.asdict(self, dict_factory=CustomDict) diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index 49d1b46a302..9b0df36a719 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -11,7 +11,7 @@ from .ensemble_config import Refcase from .parsing import ConfigDict, ConfigKeys from .parsing.config_errors import ConfigValidationError -from .response_config import ResponseConfig +from .response_config import InvalidResponseFile, ResponseConfig from .responses_index import responses_index if TYPE_CHECKING: @@ -44,7 +44,7 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: # https://github.com/equinor/ert/issues/6974 # There is a bug with storing empty responses so we have # to raise an error in that case - raise ValueError( + raise InvalidResponseFile( f"Did not find any summary values matching {self.keys} in {filename}" ) ds = xr.Dataset( diff --git a/tests/unit_tests/config/test_gen_data_config.py b/tests/unit_tests/config/test_gen_data_config.py index 2e4747b28d1..aa3bdc57f7c 100644 --- a/tests/unit_tests/config/test_gen_data_config.py +++ b/tests/unit_tests/config/test_gen_data_config.py @@ -1,8 +1,13 @@ +import os +from contextlib import suppress +from pathlib import Path from typing import List +import hypothesis.strategies as st import pytest +from hypothesis import given -from ert.config import ConfigValidationError, GenDataConfig +from ert.config import ConfigValidationError, GenDataConfig, InvalidResponseFile @pytest.mark.parametrize( @@ -59,3 +64,65 @@ def test_malformed_or_missing_gen_data_result_file(result_file, error_message): GenDataConfig.from_config_dict({"GEN_DATA": [config_line.split()]}) else: GenDataConfig.from_config_dict({"GEN_DATA": [config_line.split()]}) + + +def test_that_invalid_gendata_outfile_error_propagates(tmp_path): + (tmp_path / "poly.out").write_text(""" + 4.910405046410615,4.910405046410615 + 6.562317389289953,6.562317389289953 + 9.599763191512997,9.599763191512997 + 14.022742453079745,14.022742453079745 + 19.831255173990197,19.831255173990197 + 27.025301354244355,27.025301354244355 + 35.604880993842215,35.604880993842215 + 45.56999409278378,45.56999409278378 + 56.92064065106905,56.92064065106905 + 69.65682066869802,69.65682066869802 + """) + + config = GenDataConfig( + name="gen_data", + keys=["something"], + report_steps_list=[None], + input_files=["poly.out"], + ) + with pytest.raises( + InvalidResponseFile, + match="Error reading GEN_DATA.*could not convert string.*4.910405046410615,4.910405046410615.*to float64", + ): + config.read_from_file(tmp_path, 0) + + +@pytest.mark.usefixtures("use_tmpdir") +@given(st.binary()) +def test_that_read_file_does_not_raise_unexpected_exceptions_on_invalid_file(contents): + Path("./output").write_bytes(contents) + with suppress(InvalidResponseFile): + GenDataConfig( + name="gen_data", + keys=["something"], + report_steps_list=[None], + input_files=["output"], + ).read_from_file(os.getcwd(), 0) + + +def test_that_read_file_does_not_raise_unexpected_exceptions_on_missing_file(tmpdir): + with pytest.raises(FileNotFoundError, match="DOES_NOT_EXIST not found"): + GenDataConfig( + name="gen_data", + keys=["something"], + report_steps_list=[None], + input_files=["DOES_NOT_EXIST"], + ).read_from_file(tmpdir, 0) + + +def test_that_read_file_does_not_raise_unexpected_exceptions_on_missing_directory( + tmp_path, +): + with pytest.raises(FileNotFoundError, match="DOES_NOT_EXIST not found"): + GenDataConfig( + name="gen_data", + keys=["something"], + report_steps_list=[None], + input_files=["DOES_NOT_EXIST"], + ).read_from_file(str(tmp_path / "DOES_NOT_EXIST"), 0) diff --git a/tests/unit_tests/config/test_read_summary.py b/tests/unit_tests/config/test_read_summary.py index 001f90f41ca..5acb69377c5 100644 --- a/tests/unit_tests/config/test_read_summary.py +++ b/tests/unit_tests/config/test_read_summary.py @@ -7,6 +7,7 @@ from hypothesis import given from resdata.summary import Summary, SummaryVarType +from ert.config import InvalidResponseFile from ert.config._read_summary import make_summary_key, read_summary from ert.summary_key_type import SummaryKeyType @@ -279,7 +280,7 @@ def to_date(start_date: datetime, offset: float, unit: str) -> datetime: return start_date + timedelta(days=offset) if unit == "HOURS": return start_date + timedelta(hours=offset) - raise ValueError(f"Unknown time unit {unit}") + raise InvalidResponseFile(f"Unknown time unit {unit}") assert all( abs(actual - expected) <= timedelta(minutes=15) @@ -332,7 +333,7 @@ def test_that_incorrect_summary_files_raises_informative_errors( (tmp_path / "test.UNSMRY").write_bytes(smry_contents) (tmp_path / "test.SMSPEC").write_bytes(spec_contents) - with pytest.raises(ValueError, match=error_message): + with pytest.raises(InvalidResponseFile, match=error_message): read_summary(str(tmp_path / "test"), ["*"]) @@ -340,7 +341,7 @@ def test_mess_values_in_summary_files_raises_informative_errors(tmp_path): resfo.write(tmp_path / "test.SMSPEC", [("KEYWORDS", resfo.MESS)]) (tmp_path / "test.UNSMRY").write_bytes(b"") - with pytest.raises(ValueError, match="has incorrect type MESS"): + with pytest.raises(InvalidResponseFile, match="has incorrect type MESS"): read_summary(str(tmp_path / "test"), ["*"]) @@ -351,7 +352,7 @@ def test_empty_keywords_in_summary_files_raises_informative_errors(tmp_path): ) (tmp_path / "test.UNSMRY").write_bytes(b"") - with pytest.raises(ValueError, match="Got empty summary keyword"): + with pytest.raises(InvalidResponseFile, match="Got empty summary keyword"): read_summary(str(tmp_path / "test"), ["*"]) @@ -368,7 +369,7 @@ def test_missing_names_keywords_in_summary_files_raises_informative_errors( (tmp_path / "test.UNSMRY").write_bytes(b"") with pytest.raises( - ValueError, + InvalidResponseFile, match="Found block keyword in summary specification without dimens keyword", ): read_summary(str(tmp_path / "test"), ["*"]) @@ -388,7 +389,7 @@ def test_unknown_date_unit_in_summary_files_raises_informative_errors( (tmp_path / "test.UNSMRY").write_bytes(b"") with pytest.raises( - ValueError, + InvalidResponseFile, match="Unknown date unit .* ANNUAL", ): read_summary(str(tmp_path / "test"), ["*"]) @@ -407,7 +408,7 @@ def test_missing_units_in_summary_files_raises_an_informative_error( (tmp_path / "test.UNSMRY").write_bytes(b"") with pytest.raises( - ValueError, + InvalidResponseFile, match="Keyword units", ): read_summary(str(tmp_path / "test"), ["*"]) @@ -427,7 +428,7 @@ def test_missing_date_units_in_summary_files_raises_an_informative_error( (tmp_path / "test.UNSMRY").write_bytes(b"") with pytest.raises( - ValueError, + InvalidResponseFile, match="Unit missing for TIME", ): read_summary(str(tmp_path / "test"), ["*"]) @@ -447,7 +448,7 @@ def test_missing_time_keyword_in_summary_files_raises_an_informative_error( (tmp_path / "test.UNSMRY").write_bytes(b"") with pytest.raises( - ValueError, + InvalidResponseFile, match="KEYWORDS did not contain TIME", ): read_summary(str(tmp_path / "test"), ["*"]) @@ -466,7 +467,7 @@ def test_missing_keywords_in_smspec_raises_informative_error( (tmp_path / "test.UNSMRY").write_bytes(b"") with pytest.raises( - ValueError, + InvalidResponseFile, match="Keywords missing", ): read_summary(str(tmp_path / "test"), ["*"]) @@ -481,7 +482,7 @@ def test_that_ambiguous_case_restart_raises_an_informative_error( (tmp_path / "test.Smspec").write_bytes(b"") with pytest.raises( - ValueError, + FileNotFoundError, match="Ambiguous reference to unified summary", ): read_summary(str(tmp_path / "test"), ["*"]) diff --git a/tests/unit_tests/config/test_summary_config.py b/tests/unit_tests/config/test_summary_config.py index 992f635d9a9..cc0c0c4de73 100644 --- a/tests/unit_tests/config/test_summary_config.py +++ b/tests/unit_tests/config/test_summary_config.py @@ -1,8 +1,17 @@ +import os +from contextlib import suppress +from pathlib import Path + import hypothesis.strategies as st import pytest from hypothesis import given, settings -from ert.config import ConfigValidationError, ErtConfig, SummaryConfig +from ert.config import ( + ConfigValidationError, + ErtConfig, + InvalidResponseFile, + SummaryConfig, +) from .summary_generator import summaries @@ -24,7 +33,7 @@ def test_reading_empty_summaries_raises(wopr_summary): smspec, unsmry = wopr_summary smspec.to_file("CASE.SMSPEC") unsmry.to_file("CASE.UNSMRY") - with pytest.raises(ValueError, match="Did not find any summary values"): + with pytest.raises(InvalidResponseFile, match="Did not find any summary values"): SummaryConfig("summary", ["CASE"], ["WWCT:OP1"]).read_from_file(".", 0) @@ -33,3 +42,29 @@ def test_summary_config_normalizes_list_of_keys(): "FOPR", "WOPR", ] + + +@pytest.mark.usefixtures("use_tmpdir") +@given(st.binary(), st.binary()) +def test_that_read_file_does_not_raise_unexpected_exceptions_on_invalid_file( + smspec, unsmry +): + Path("CASE.UNSMRY").write_bytes(unsmry) + Path("CASE.SMSPEC").write_bytes(smspec) + with suppress(InvalidResponseFile): + SummaryConfig("summary", ["CASE"], ["FOPR"]).read_from_file(os.getcwd(), 1) + + +def test_that_read_file_does_not_raise_unexpected_exceptions_on_missing_file(tmpdir): + with pytest.raises(FileNotFoundError): + SummaryConfig("summary", ["NOT_CASE"], ["FOPR"]).read_from_file(tmpdir, 1) + + +@pytest.mark.usefixtures("use_tmpdir") +def test_that_read_file_does_not_raise_unexpected_exceptions_on_missing_directory( + tmp_path, +): + with pytest.raises(FileNotFoundError): + SummaryConfig("summary", ["CASE"], ["FOPR"]).read_from_file( + str(tmp_path / "DOES_NOT_EXIST"), 1 + )