codecentric-oss · aiakide · Nov 8, 2023 · Sep 27, 2023 · Oct 4, 2023 · Oct 18, 2023
diff --git a/niceml/config/envconfig.py b/niceml/config/envconfig.py
@@ -12,6 +12,7 @@
 ENVIRONMENT_KEY = "ENVIRONMENT"
 DESCRIPTION_KEY = "DESCRIPTION"
 LOCAL_EXP_CACHE_PATH_KEY = "LOCAL_EXP_CACHE_PATH"
+LAST_MODIFIED_KEY = "LAST_MODIFIED"
 
 
 def replace_id_keys(input_str: str, short_id: str, run_id: str) -> str:

diff --git a/niceml/dashboard/remotettrainutils.py b/niceml/dashboard/remotettrainutils.py
@@ -40,7 +40,7 @@ def select_to_load_exps(
     That means which are not in the experiment manager"""
     experiments_to_load = []
     for exp_info in exp_info_list:
-        if exp_info not in exp_manager:
+        if exp_manager.is_exp_modified(exp_info.short_id, exp_info.last_modified):
             experiments_to_load.append(exp_info)
     return experiments_to_load
 
@@ -66,7 +66,8 @@ def load_experiments(
     local_exp_cache: Optional[ExperimentCache] = None,
 ):
     """Load the experiments from the cloud storage and
-    stores them in the experiment manager. Additionally, they are saved in the local cache"""
+    stores them in the experiment manager. Additionally, they are saved in the local cache
+    """
     experiments: List[ExperimentData]
     dir_info_list: List[str] = []
     load_exp_info_list: List[ExperimentInfo] = []
@@ -78,7 +79,9 @@ def _check_and_load_cache(
     ) -> List[ExperimentData]:
         experiments_list = []
         for cur_exp_info in exp_info_list:
-            if local_exp_cache is not None and cur_exp_info.short_id in local_exp_cache:
+            if local_exp_cache is not None and not local_exp_cache.should_reload(
+                cur_exp_info
+            ):
                 initialized_df_loader: DfLoader = df_loader_factory.create_df_loader(
                     storage, cur_exp_info.exp_filepath
                 )
@@ -114,10 +117,7 @@ def _check_and_load_cache(
             )
             if experiment is not None:
                 experiments.append(experiment)
-                if (
-                    local_exp_cache is not None
-                    and experiment.get_short_id() not in local_exp_cache
-                ):
+                if local_exp_cache is not None:
                     local_exp_cache.save_experiment(experiment)
             prog_bar.progress(idx / load_exp_count)
             status_text.text(f"Cached {idx}/{load_exp_count} experiments")

diff --git a/niceml/data/datainfolistings/clsdatainfolisting.py b/niceml/data/datainfolistings/clsdatainfolisting.py
@@ -31,12 +31,14 @@ def __init__(
         label_suffix: str = ".json",
         image_suffixes: Optional[List[str]] = None,
     ):
+        """Init method of LabelClsDataInfoListing"""
         self.sub_dir = sub_dir
         self.data_location = data_location
         self.label_suffix = label_suffix
         self.image_suffixes = image_suffixes or [".png", ".jpg", ".jpeg"]
 
     def list(self, data_description: DataDescription) -> List[ClsDataInfo]:
+        """Lists all data infos"""
         output_data_description: OutputVectorDataDescription = check_instance(
             data_description, OutputVectorDataDescription
         )
@@ -73,6 +75,11 @@ def list(self, data_description: DataDescription) -> List[ClsDataInfo]:
         return new_data_info_list
 
 
+def _default_class_extractor(input_str: str) -> str:
+    """Default class extractor for DirClsDataInfoListing"""
+    return splitext(input_str)[0].rsplit("_", maxsplit=1)[-1]
+
+
 class DirClsDataInfoListing(
     DataInfoListing
 ):  # pylint: disable=too-few-public-methods, too-many-arguments
@@ -85,14 +92,14 @@ def __init__(
         class_extractor: Optional[Callable] = None,
         image_suffixes: Optional[List[str]] = None,
     ):
+        """Init method of DirClsDataInfoListing"""
         self.sub_dir = sub_dir
         self.location = location
-        self.class_extractor = class_extractor or (
-            lambda x: splitext(x)[0].rsplit("_", maxsplit=1)[-1]
-        )
+        self.class_extractor = class_extractor or _default_class_extractor
         self.image_suffixes = image_suffixes or [".png", ".jpg", ".jpeg"]
 
     def list(self, data_description: DataDescription) -> List[ClsDataInfo]:
+        """Lists all data infos"""
         output_data_description: OutputVectorDataDescription = check_instance(
             data_description, OutputVectorDataDescription
         )

diff --git a/niceml/experiments/experimentcontext.py b/niceml/experiments/experimentcontext.py
@@ -1,4 +1,5 @@
 """Module for the ExperimentContext"""
+import logging
 from dataclasses import dataclass
 from os.path import join
 from typing import Optional, Union
@@ -7,6 +8,7 @@
 from fsspec import AbstractFileSystem
 from PIL import Image
 
+from niceml.config.envconfig import LAST_MODIFIED_KEY
 from niceml.config.hydra import instantiate_from_yaml
 from niceml.data.datadescriptions.datadescription import DataDescription
 from niceml.experiments.expfilenames import ExperimentFilenames, OpNames
@@ -21,6 +23,7 @@
     write_parquet,
     write_yaml,
 )
+from niceml.utilities.timeutils import generate_timestamp
 
 
 @dataclass
@@ -36,6 +39,7 @@ def write_parquet(
         dataframe: pd.DataFrame,
         data_path: str,
         compression: Optional[str] = "gzip",
+        apply_last_modified: bool = True,
         **kwargs,
     ):
         """writes the dataframe as parquet file relative to the experiment"""
@@ -48,6 +52,8 @@ def write_parquet(
                 file_system=file_system,
                 **kwargs,
             )
+        if apply_last_modified:
+            self.update_last_modified()
 
     def read_parquet(self, data_path: str) -> pd.DataFrame:
         """reads the dataframe as parquet file relative to the experiment"""
@@ -59,7 +65,9 @@ def read_yaml(self, data_path: str) -> dict:
         with open_location(self.fs_config) as (file_system, root_path):
             return read_yaml(join(root_path, data_path), file_system=file_system)
 
-    def write_yaml(self, data: dict, data_path: str, **kwargs):
+    def write_yaml(
+        self, data: dict, data_path: str, apply_last_modified: bool = True, **kwargs
+    ):
         """writes the yaml file relative to the experiment"""
         with open_location(self.fs_config) as (file_system, root_path):
             write_yaml(
@@ -68,13 +76,21 @@ def write_yaml(self, data: dict, data_path: str, **kwargs):
                 file_system=file_system,
                 **kwargs,
             )
+        if apply_last_modified:
+            self.update_last_modified()
 
     def read_csv(self, data_path: str) -> pd.DataFrame:
         """Reads a csv file relative to the experiment"""
         with open_location(self.fs_config) as (file_system, root_path):
             return read_csv(join(root_path, data_path), file_system=file_system)
 
-    def write_csv(self, data: pd.DataFrame, data_path: str, **kwargs):
+    def write_csv(
+        self,
+        data: pd.DataFrame,
+        data_path: str,
+        apply_last_modified: bool = True,
+        **kwargs,
+    ):
         """Writes a csv file relative to the experiment"""
         with open_location(self.fs_config) as (file_system, root_path):
             write_csv(
@@ -83,11 +99,17 @@ def write_csv(self, data: pd.DataFrame, data_path: str, **kwargs):
                 file_system=file_system,
                 **kwargs,
             )
+        if apply_last_modified:
+            self.update_last_modified()
 
-    def write_image(self, image: Image.Image, data_path: str):
+    def write_image(
+        self, image: Image.Image, data_path: str, apply_last_modified: bool = True
+    ):
         """Writes an image relative to the experiment"""
         with open_location(self.fs_config) as (file_system, root_path):
             write_image(image, join(root_path, data_path), file_system=file_system)
+        if apply_last_modified:
+            self.update_last_modified()
 
     def read_image(self, data_path: str) -> Image.Image:
         """Reads an image relative to the experiment"""
@@ -114,3 +136,18 @@ def instantiate_datadescription_from_yaml(self) -> DataDescription:
                 file_system=exp_fs,
             )
         return data_description
+
+    def update_last_modified(self, timestamp: Optional[str] = None):
+        """Updates the last modified timestamp of the experiment info"""
+        timestamp = timestamp or generate_timestamp()
+        try:
+            exp_info_dict = self.read_yaml(ExperimentFilenames.EXP_INFO)
+            exp_info_dict[LAST_MODIFIED_KEY] = timestamp
+            self.write_yaml(
+                exp_info_dict, ExperimentFilenames.EXP_INFO, apply_last_modified=False
+            )
+        except FileNotFoundError:
+            logging.getLogger(__name__).warning(
+                "Could not update last modified timestamp, because the "
+                "experiment info file was not found."
+            )
diff --git a/niceml/experiments/experimentinfo.py b/niceml/experiments/experimentinfo.py
@@ -15,6 +15,7 @@
     EXP_TYPE_KEY,
     RUN_ID_KEY,
     SHORT_ID_KEY,
+    LAST_MODIFIED_KEY,
 )
 from niceml.utilities.idutils import ALPHANUMERICLIST
 from niceml.utilities.ioutils import read_yaml
@@ -34,6 +35,7 @@ class ExperimentInfo:
     description: str
     exp_dir: str
     exp_filepath: Optional[str] = None
+    last_modified: Optional[str] = None
 
     def as_save_dict(self) -> dict:
         """Returns a dictionary which can be saved to a yaml file"""
@@ -46,8 +48,13 @@ def as_save_dict(self) -> dict:
             ENVIRONMENT_KEY: self.environment,
             DESCRIPTION_KEY: self.description,
             EXP_DIR_KEY: self.exp_dir,
+            LAST_MODIFIED_KEY: self.last_modified,
         }
 
+    def is_modified(self, other) -> bool:
+        """Checks if the other experiment info is modified"""
+        return self.last_modified != other.last_modified
+
 
 def load_exp_info(
     exp_info_file, file_system: Optional[AbstractFileSystem] = None
@@ -72,6 +79,7 @@ def experiment_info_factory(data: dict, path: Optional[str] = None) -> Experimen
         description=data.get(DESCRIPTION_KEY, ""),
         exp_dir=data.get(EXP_DIR_KEY, ""),
         exp_filepath=path,
+        last_modified=data.get(LAST_MODIFIED_KEY, None),
     )
 
 
@@ -91,7 +99,7 @@ def get_exp_id_from_name(input_name: str) -> str:
             f"ID not found anywhere starting with 'id_': {input_name}"
         )
     cur_id = input_name[index + 3 : index + 7]
-    if len(cur_id) != 4:
+    if len(cur_id) != 4:  # noqa: PLR2004
         raise ExpIdNotFoundError(f"ID not complete: {input_name}")
     if any((x not in ALPHANUMERICLIST for x in cur_id)):
         raise ExpIdNotFoundError(

diff --git a/niceml/experiments/experimentmanager.py b/niceml/experiments/experimentmanager.py
@@ -36,6 +36,7 @@ def add_experiment(self, experiment: ExperimentData):
         self.exp_dict[experiment.get_run_id()] = experiment
 
     def __contains__(self, exp_id: Union[str, ExperimentInfo]):
+        """Checks if the experiment is in the manager"""
         if type(exp_id) == ExperimentInfo:
             exp_id = exp_id.short_id
         for experiment in self.experiments:
@@ -99,6 +100,13 @@ def get_metrics(self, experiments: Optional[List[str]] = None) -> List[str]:
 
         return sorted(list(metric_set))
 
+    def is_exp_modified(self, exp_id: str, new_time_str: str) -> bool:
+        """Checks if the experiment has been modified"""
+        if exp_id not in self.exp_dict:
+            return True
+        exp = self.get_exp_by_id(exp_id)
+        return exp.exp_info.is_modified(new_time_str)
+
     def get_datasets(self) -> List[str]:
         """Returns a list of all datasets used in the experiments"""
         dataset_set: Set[str] = set()
@@ -234,6 +242,7 @@ def get_metrics_visu_df(
     def get_value_information_dict(
         self, info_path: List[str], list_connection_str: str = "x"
     ) -> Dict[Any, List[str]]:
+        """Returns a dict with information about the values"""
         value_information_dict = defaultdict(list)
         for exp in self.experiments:
             try:
@@ -254,17 +263,20 @@ def get_epochs_information_dict(self) -> Dict[int, List[str]]:
         return epochs_information_dict
 
     def get_datasets_information_dict(self) -> Dict[str, List[str]]:
+        """Returns a dict with information about the datasets"""
         datasets_information_dict = defaultdict(list)
         for exp in self.experiments:
             dataset = exp.get_experiment_path().split("/")[0]
             datasets_information_dict[dataset].append(exp.get_short_id())
         return datasets_information_dict
 
     def get_dataset(self, exp: ExperimentData) -> str:
+        """Returns the dataset of the given experiment"""
         dataset = exp.get_experiment_path().split("/")[0]
         return dataset
 
     def get_date_information_dict(self) -> Dict[date, List[str]]:
+        """Returns a dict with information about the dates"""
         date_information_dict = defaultdict(list)
         for exp in self.experiments:
             date_string = exp.exp_info.run_id.split("T")[0]
@@ -273,6 +285,7 @@ def get_date_information_dict(self) -> Dict[date, List[str]]:
         return date_information_dict
 
     def get_experiment_type_information_dict(self) -> Dict[str, List[str]]:
+        """Returns a dict with information about the experiment types"""
         experiment_type_information_dict = defaultdict(list)
         for exp in self.experiments:
             experiment_type = exp.get_experiment_path().split("/")[-1].split("-")[0]
@@ -300,6 +313,7 @@ def local_exp_manager_factory(path: str) -> ExperimentManager:
 
 
 def get_add_min_max(metric_name: str, mode_dict: Dict[str, str]) -> Tuple[bool, bool]:
+    """Returns if min and max should be added"""
     add_min: bool = True
     add_max: bool = True
     for key, mode in mode_dict.items():