Make InvalidResponseFile and FileNotFoundError part of read_from_file

equinor · Oct 23, 2024 · 6bb5eed · 6bb5eed
1 parent 1c82874
commit 6bb5eed
Show file tree

Hide file tree

Showing 9 changed files with 203 additions and 48 deletions.
diff --git a/src/ert/callbacks.py b/src/ert/callbacks.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import Iterable
 
-from ert.config import ParameterConfig, ResponseConfig
+from ert.config import InvalidResponseFile, ParameterConfig, ResponseConfig
 from ert.run_arg import RunArg
 from ert.storage.realization_storage_state import RealizationStorageState
 
@@ -54,7 +54,12 @@ async def _write_responses_to_storage(
         try:
             start_time = time.perf_counter()
             logger.debug(f"Starting to load response: {config.response_type}")
-            ds = config.read_from_file(run_arg.runpath, run_arg.iens)
+            try:
+                ds = config.read_from_file(run_arg.runpath, run_arg.iens)
+            except (FileNotFoundError, InvalidResponseFile) as err:
+                errors.append(str(err))
+                logger.warning(f"Failed to write: {run_arg.iens}: {err}")
+                continue
             await asyncio.sleep(0)
             logger.debug(
                 f"Loaded {config.response_type}",
@@ -69,9 +74,14 @@ async def _write_responses_to_storage(
                 f"Saved {config.response_type} to storage",
                 extra={"Time": f"{(time.perf_counter() - start_time):.4f}s"},
             )
-        except ValueError as err:
+        except Exception as err:
             errors.append(str(err))
-            logger.warning(f"Failed to write: {run_arg.iens}", exc_info=err)
+            logger.exception(
+                f"Unexpected exception while writing response to storage {run_arg.iens}",
+                exc_info=err,
+            )
+            continue
+
     if errors:
         return LoadResult(LoadStatus.LOAD_FAILURE, "\n".join(errors))
     return LoadResult(LoadStatus.LOAD_SUCCESSFUL, "")
@@ -98,7 +108,10 @@ async def forward_model_ok(
             )
 
     except Exception as err:
-        logger.exception(f"Failed to load results for realization {run_arg.iens}")
+        logger.exception(
+            f"Failed to load results for realization {run_arg.iens}",
+            exc_info=err,
+        )
         parameters_result = LoadResult(
             LoadStatus.LOAD_FAILURE,
             "Failed to load results for realization "

diff --git a/src/ert/config/__init__.py b/src/ert/config/__init__.py
@@ -32,7 +32,7 @@
     WarningInfo,
 )
 from .queue_config import QueueConfig
-from .response_config import ResponseConfig
+from .response_config import InvalidResponseFile, ResponseConfig
 from .summary_config import SummaryConfig
 from .summary_observation import SummaryObservation
 from .surface_config import SurfaceConfig
@@ -68,6 +68,7 @@
     "GenKwConfig",
     "HookRuntime",
     "IESSettings",
+    "InvalidResponseFile",
     "ModelConfig",
     "ParameterConfig",
     "PriorDict",

diff --git a/src/ert/config/_read_summary.py b/src/ert/config/_read_summary.py
@@ -25,6 +25,8 @@
 
 from ert.summary_key_type import SummaryKeyType
 
+from .response_config import InvalidResponseFile
+
 
 def _cell_index(
     array_index: int, nx: PositiveInt, ny: PositiveInt
@@ -44,7 +46,7 @@ def _check_if_missing(
     keyword_name: str, missing_key: str, *test_vars: Optional[T]
 ) -> List[T]:
     if any(v is None for v in test_vars):
-        raise ValueError(
+        raise InvalidResponseFile(
             f"Found {keyword_name} keyword in summary "
             f"specification without {missing_key} keyword"
         )
@@ -62,7 +64,13 @@ def make_summary_key(
     lj: Optional[int] = None,
     lk: Optional[int] = None,
 ) -> Optional[str]:
-    sum_type = SummaryKeyType.from_keyword(keyword)
+    try:
+        sum_type = SummaryKeyType.from_keyword(keyword)
+    except Exception as err:
+        raise InvalidResponseFile(
+            f"Could not read summary keyword '{keyword}': {err}"
+        ) from err
+
     if sum_type in [
         SummaryKeyType.FIELD,
         SummaryKeyType.OTHER,
@@ -111,7 +119,7 @@ def make_summary_key(
     if sum_type == SummaryKeyType.NETWORK:
         (name,) = _check_if_missing("network", "WGNAMES", name)
         return f"{keyword}:{name}"
-    raise ValueError(f"Unexpected keyword type: {sum_type}")
+    raise InvalidResponseFile(f"Unexpected keyword type: {sum_type}")
 
 
 class DateUnit(Enum):
@@ -123,7 +131,7 @@ def make_delta(self, val: float) -> timedelta:
             return timedelta(hours=val)
         if self == DateUnit.DAYS:
             return timedelta(days=val)
-        raise ValueError(f"Unknown date unit {val}")
+        raise InvalidResponseFile(f"Unknown date unit {val}")
 
 
 def _is_base_with_extension(base: str, path: str, exts: List[str]) -> bool:
@@ -159,9 +167,9 @@ def _find_file_matching(
     dir, base = os.path.split(case)
     candidates = list(filter(lambda x: predicate(base, x), os.listdir(dir or ".")))
     if not candidates:
-        raise ValueError(f"Could not find any {kind} matching case path {case}")
+        raise FileNotFoundError(f"Could not find any {kind} matching case path {case}")
     if len(candidates) > 1:
-        raise ValueError(
+        raise FileNotFoundError(
             f"Ambiguous reference to {kind} in {case}, could be any of {candidates}"
         )
     return os.path.join(dir, candidates[0])
@@ -188,7 +196,9 @@ def read_summary(
             summary, start_date, date_units, indices, date_index
         )
     except resfo.ResfoParsingError as err:
-        raise ValueError(f"Failed to read summary file {filepath}: {err}") from err
+        raise InvalidResponseFile(
+            f"Failed to read summary file {filepath}: {err}"
+        ) from err
     return (start_date, keys, time_map, fetched)
 
 
@@ -202,7 +212,7 @@ def _check_vals(
     kw: str, spec: str, vals: Union[npt.NDArray[Any], resfo.MESS]
 ) -> npt.NDArray[Any]:
     if vals is resfo.MESS or isinstance(vals, resfo.MESS):
-        raise ValueError(f"{kw.strip()} in {spec} has incorrect type MESS")
+        raise InvalidResponseFile(f"{kw.strip()} in {spec} has incorrect type MESS")
     return vals
 
 
@@ -304,7 +314,7 @@ def _read_spec(
                         # microsecond=self.micro_seconds % 10**6,
                     )
                 except Exception as err:
-                    raise ValueError(
+                    raise InvalidResponseFile(
                         f"SMSPEC {spec} contains invalid STARTDAT: {err}"
                     ) from err
     keywords = arrays["KEYWORDS"]
@@ -315,9 +325,9 @@ def _read_spec(
     lgr_names = arrays["LGRS    "]
 
     if date is None:
-        raise ValueError(f"Keyword startdat missing in {spec}")
+        raise InvalidResponseFile(f"Keyword startdat missing in {spec}")
     if keywords is None:
-        raise ValueError(f"Keywords missing in {spec}")
+        raise InvalidResponseFile(f"Keywords missing in {spec}")
     if n is None:
         n = len(keywords)
 
@@ -371,17 +381,17 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any:
 
     units = arrays["UNITS   "]
     if units is None:
-        raise ValueError(f"Keyword units missing in {spec}")
+        raise InvalidResponseFile(f"Keyword units missing in {spec}")
     if date_index is None:
-        raise ValueError(f"KEYWORDS did not contain TIME in {spec}")
+        raise InvalidResponseFile(f"KEYWORDS did not contain TIME in {spec}")
     if date_index >= len(units):
-        raise ValueError(f"Unit missing for TIME in {spec}")
+        raise InvalidResponseFile(f"Unit missing for TIME in {spec}")
 
     unit_key = _key2str(units[date_index])
     try:
         date_unit = DateUnit[unit_key]
     except KeyError:
-        raise ValueError(f"Unknown date unit in {spec}: {unit_key}") from None
+        raise InvalidResponseFile(f"Unknown date unit in {spec}: {unit_key}") from None
 
     return (
         date_index,

diff --git a/src/ert/config/gen_data_config.py b/src/ert/config/gen_data_config.py
@@ -12,7 +12,7 @@
 
 from ._option_dict import option_dict
 from .parsing import ConfigDict, ConfigValidationError, ErrorInfo
-from .response_config import ResponseConfig
+from .response_config import InvalidResponseFile, ResponseConfig
 from .responses_index import responses_index
 
 
@@ -109,12 +109,16 @@ def from_config_dict(cls, config_dict: ConfigDict) -> Optional[Self]:
 
     def read_from_file(self, run_path: str, _: int) -> xr.Dataset:
         def _read_file(filename: Path, report_step: int) -> xr.Dataset:
-            if not filename.exists():
-                raise ValueError(f"Missing output file: {filename}")
-            data = np.loadtxt(_run_path / filename, ndmin=1)
+            try:
+                data = np.loadtxt(_run_path / filename, ndmin=1)
+            except ValueError as err:
+                raise InvalidResponseFile(str(err)) from err
             active_information_file = _run_path / (str(filename) + "_active")
             if active_information_file.exists():
-                active_list = np.loadtxt(active_information_file)
+                try:
+                    active_list = np.loadtxt(active_information_file)
+                except ValueError as err:
+                    raise InvalidResponseFile(str(err)) from err
                 data[active_list == 0] = np.nan
             return xr.Dataset(
                 {"values": (["report_step", "index"], [data])},
@@ -138,25 +142,34 @@ def _read_file(filename: Path, report_step: int) -> xr.Dataset:
                     datasets_per_report_step.append(
                         _read_file(_run_path / input_file, 0)
                     )
-                except ValueError as err:
-                    errors.append(str(err))
+                except (InvalidResponseFile, FileNotFoundError) as err:
+                    errors.append(err)
             else:
                 for report_step in report_steps:
                     filename = input_file % report_step
                     try:
                         datasets_per_report_step.append(
                             _read_file(_run_path / filename, report_step)
                         )
-                    except ValueError as err:
-                        errors.append(str(err))
+                    except (InvalidResponseFile, FileNotFoundError) as err:
+                        errors.append(err)
 
             ds_all_report_steps = xr.concat(
                 datasets_per_report_step, dim="report_step"
             ).expand_dims(name=[name])
             datasets_per_name.append(ds_all_report_steps)
 
         if errors:
-            raise ValueError(f"Error reading GEN_DATA: {self.name}, errors: {errors}")
+            if all(isinstance(err, FileNotFoundError) for err in errors):
+                raise FileNotFoundError(
+                    "Could not find one or more files/directories while reading GEN_DATA"
+                    f" {self.name}: {','.join([str(err) for err in errors])}"
+                )
+            else:
+                raise InvalidResponseFile(
+                    "Error reading GEN_DATA "
+                    f"{self.name}, errors: {','.join([str(err) for err in errors])}"
+                )
 
         combined = xr.concat(datasets_per_name, dim="name")
         combined.attrs["response"] = "gen_data"

diff --git a/src/ert/config/response_config.py b/src/ert/config/response_config.py
@@ -9,14 +9,29 @@
 from ert.config.parsing import ConfigDict
 
 
+class InvalidResponseFile(Exception):
+    """
+    Raised when an input file of the ResponseConfig has
+    the incorrect format.
+    """
+
+
 @dataclasses.dataclass
 class ResponseConfig(ABC):
     name: str
     input_files: List[str] = dataclasses.field(default_factory=list)
     keys: List[str] = dataclasses.field(default_factory=list)
 
     @abstractmethod
-    def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: ...
+    def read_from_file(self, run_path: str, iens: int) -> xr.Dataset:
+        """Reads the data for the response from run_path.
+
+        Raises:
+            FileNotFoundError: when one of the input_files for the
+                response is missing.
+            InvalidResponseFile: when one of the input_files is
+                invalid
+        """
 
     def to_dict(self) -> Dict[str, Any]:
         data = dataclasses.asdict(self, dict_factory=CustomDict)

diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py
@@ -11,7 +11,7 @@
 from .ensemble_config import Refcase
 from .parsing import ConfigDict, ConfigKeys
 from .parsing.config_errors import ConfigValidationError
-from .response_config import ResponseConfig
+from .response_config import InvalidResponseFile, ResponseConfig
 from .responses_index import responses_index
 
 if TYPE_CHECKING:
@@ -44,7 +44,7 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset:
             # https://github.com/equinor/ert/issues/6974
             # There is a bug with storing empty responses so we have
             # to raise an error in that case
-            raise ValueError(
+            raise InvalidResponseFile(
                 f"Did not find any summary values matching {self.keys} in {filename}"
             )
         ds = xr.Dataset(

diff --git a/tests/unit_tests/config/test_gen_data_config.py b/tests/unit_tests/config/test_gen_data_config.py
@@ -1,8 +1,13 @@
+import os
+from contextlib import suppress
+from pathlib import Path
 from typing import List
 
+import hypothesis.strategies as st
 import pytest
+from hypothesis import given
 
-from ert.config import ConfigValidationError, GenDataConfig
+from ert.config import ConfigValidationError, GenDataConfig, InvalidResponseFile
 
 
 @pytest.mark.parametrize(
@@ -59,3 +64,65 @@ def test_malformed_or_missing_gen_data_result_file(result_file, error_message):
             GenDataConfig.from_config_dict({"GEN_DATA": [config_line.split()]})
     else:
         GenDataConfig.from_config_dict({"GEN_DATA": [config_line.split()]})
+
+
+def test_that_invalid_gendata_outfile_error_propagates(tmp_path):
+    (tmp_path / "poly.out").write_text("""
+        4.910405046410615,4.910405046410615
+        6.562317389289953,6.562317389289953
+        9.599763191512997,9.599763191512997
+        14.022742453079745,14.022742453079745
+        19.831255173990197,19.831255173990197
+        27.025301354244355,27.025301354244355
+        35.604880993842215,35.604880993842215
+        45.56999409278378,45.56999409278378
+        56.92064065106905,56.92064065106905
+        69.65682066869802,69.65682066869802
+    """)
+
+    config = GenDataConfig(
+        name="gen_data",
+        keys=["something"],
+        report_steps_list=[None],
+        input_files=["poly.out"],
+    )
+    with pytest.raises(
+        InvalidResponseFile,
+        match="Error reading GEN_DATA.*could not convert string.*4.910405046410615,4.910405046410615.*to float64",
+    ):
+        config.read_from_file(tmp_path, 0)
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+@given(st.binary())
+def test_that_read_file_does_not_raise_unexpected_exceptions_on_invalid_file(contents):
+    Path("./output").write_bytes(contents)
+    with suppress(InvalidResponseFile):
+        GenDataConfig(
+            name="gen_data",
+            keys=["something"],
+            report_steps_list=[None],
+            input_files=["output"],
+        ).read_from_file(os.getcwd(), 0)
+
+
+def test_that_read_file_does_not_raise_unexpected_exceptions_on_missing_file(tmpdir):
+    with pytest.raises(FileNotFoundError, match="DOES_NOT_EXIST not found"):
+        GenDataConfig(
+            name="gen_data",
+            keys=["something"],
+            report_steps_list=[None],
+            input_files=["DOES_NOT_EXIST"],
+        ).read_from_file(tmpdir, 0)
+
+
+def test_that_read_file_does_not_raise_unexpected_exceptions_on_missing_directory(
+    tmp_path,
+):
+    with pytest.raises(FileNotFoundError, match="DOES_NOT_EXIST not found"):
+        GenDataConfig(
+            name="gen_data",
+            keys=["something"],
+            report_steps_list=[None],
+            input_files=["DOES_NOT_EXIST"],
+        ).read_from_file(str(tmp_path / "DOES_NOT_EXIST"), 0)