Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release preparation v0.8.3 #88

Merged
merged 6 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions niceml/config/envconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
ENVIRONMENT_KEY = "ENVIRONMENT"
DESCRIPTION_KEY = "DESCRIPTION"
LOCAL_EXP_CACHE_PATH_KEY = "LOCAL_EXP_CACHE_PATH"
LAST_MODIFIED_KEY = "LAST_MODIFIED"


def replace_id_keys(input_str: str, short_id: str, run_id: str) -> str:
Expand Down
14 changes: 7 additions & 7 deletions niceml/dashboard/remotettrainutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def select_to_load_exps(
That means which are not in the experiment manager"""
experiments_to_load = []
for exp_info in exp_info_list:
if exp_info not in exp_manager:
if exp_manager.is_exp_modified(exp_info.short_id, exp_info.last_modified):
experiments_to_load.append(exp_info)
return experiments_to_load

Expand All @@ -66,7 +66,8 @@ def load_experiments(
local_exp_cache: Optional[ExperimentCache] = None,
):
"""Load the experiments from the cloud storage and
stores them in the experiment manager. Additionally, they are saved in the local cache"""
stores them in the experiment manager. Additionally, they are saved in the local cache
"""
experiments: List[ExperimentData]
dir_info_list: List[str] = []
load_exp_info_list: List[ExperimentInfo] = []
Expand All @@ -78,7 +79,9 @@ def _check_and_load_cache(
) -> List[ExperimentData]:
experiments_list = []
for cur_exp_info in exp_info_list:
if local_exp_cache is not None and cur_exp_info.short_id in local_exp_cache:
if local_exp_cache is not None and not local_exp_cache.should_reload(
cur_exp_info
):
initialized_df_loader: DfLoader = df_loader_factory.create_df_loader(
storage, cur_exp_info.exp_filepath
)
Expand Down Expand Up @@ -114,10 +117,7 @@ def _check_and_load_cache(
)
if experiment is not None:
experiments.append(experiment)
if (
local_exp_cache is not None
and experiment.get_short_id() not in local_exp_cache
):
if local_exp_cache is not None:
local_exp_cache.save_experiment(experiment)
prog_bar.progress(idx / load_exp_count)
status_text.text(f"Cached {idx}/{load_exp_count} experiments")
Expand Down
13 changes: 10 additions & 3 deletions niceml/data/datainfolistings/clsdatainfolisting.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ def __init__(
label_suffix: str = ".json",
image_suffixes: Optional[List[str]] = None,
):
"""Init method of LabelClsDataInfoListing"""
self.sub_dir = sub_dir
self.data_location = data_location
self.label_suffix = label_suffix
self.image_suffixes = image_suffixes or [".png", ".jpg", ".jpeg"]

def list(self, data_description: DataDescription) -> List[ClsDataInfo]:
"""Lists all data infos"""
output_data_description: OutputVectorDataDescription = check_instance(
data_description, OutputVectorDataDescription
)
Expand Down Expand Up @@ -73,6 +75,11 @@ def list(self, data_description: DataDescription) -> List[ClsDataInfo]:
return new_data_info_list


def _default_class_extractor(input_str: str) -> str:
"""Default class extractor for DirClsDataInfoListing"""
return splitext(input_str)[0].rsplit("_", maxsplit=1)[-1]


class DirClsDataInfoListing(
DataInfoListing
): # pylint: disable=too-few-public-methods, too-many-arguments
Expand All @@ -85,14 +92,14 @@ def __init__(
class_extractor: Optional[Callable] = None,
image_suffixes: Optional[List[str]] = None,
):
"""Init method of DirClsDataInfoListing"""
self.sub_dir = sub_dir
self.location = location
self.class_extractor = class_extractor or (
lambda x: splitext(x)[0].rsplit("_", maxsplit=1)[-1]
)
self.class_extractor = class_extractor or _default_class_extractor
self.image_suffixes = image_suffixes or [".png", ".jpg", ".jpeg"]

def list(self, data_description: DataDescription) -> List[ClsDataInfo]:
"""Lists all data infos"""
output_data_description: OutputVectorDataDescription = check_instance(
data_description, OutputVectorDataDescription
)
Expand Down
43 changes: 40 additions & 3 deletions niceml/experiments/experimentcontext.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module for the ExperimentContext"""
import logging
from dataclasses import dataclass
from os.path import join
from typing import Optional, Union
Expand All @@ -7,6 +8,7 @@
from fsspec import AbstractFileSystem
from PIL import Image

from niceml.config.envconfig import LAST_MODIFIED_KEY
from niceml.config.hydra import instantiate_from_yaml
from niceml.data.datadescriptions.datadescription import DataDescription
from niceml.experiments.expfilenames import ExperimentFilenames, OpNames
Expand All @@ -21,6 +23,7 @@
write_parquet,
write_yaml,
)
from niceml.utilities.timeutils import generate_timestamp


@dataclass
Expand All @@ -36,6 +39,7 @@ def write_parquet(
dataframe: pd.DataFrame,
data_path: str,
compression: Optional[str] = "gzip",
apply_last_modified: bool = True,
**kwargs,
):
"""writes the dataframe as parquet file relative to the experiment"""
Expand All @@ -48,6 +52,8 @@ def write_parquet(
file_system=file_system,
**kwargs,
)
if apply_last_modified:
self.update_last_modified()

def read_parquet(self, data_path: str) -> pd.DataFrame:
"""reads the dataframe as parquet file relative to the experiment"""
Expand All @@ -59,7 +65,9 @@ def read_yaml(self, data_path: str) -> dict:
with open_location(self.fs_config) as (file_system, root_path):
return read_yaml(join(root_path, data_path), file_system=file_system)

def write_yaml(self, data: dict, data_path: str, **kwargs):
def write_yaml(
self, data: dict, data_path: str, apply_last_modified: bool = True, **kwargs
):
"""writes the yaml file relative to the experiment"""
with open_location(self.fs_config) as (file_system, root_path):
write_yaml(
Expand All @@ -68,13 +76,21 @@ def write_yaml(self, data: dict, data_path: str, **kwargs):
file_system=file_system,
**kwargs,
)
if apply_last_modified:
self.update_last_modified()

def read_csv(self, data_path: str) -> pd.DataFrame:
"""Reads a csv file relative to the experiment"""
with open_location(self.fs_config) as (file_system, root_path):
return read_csv(join(root_path, data_path), file_system=file_system)

def write_csv(self, data: pd.DataFrame, data_path: str, **kwargs):
def write_csv(
self,
data: pd.DataFrame,
data_path: str,
apply_last_modified: bool = True,
**kwargs,
):
"""Writes a csv file relative to the experiment"""
with open_location(self.fs_config) as (file_system, root_path):
write_csv(
Expand All @@ -83,11 +99,17 @@ def write_csv(self, data: pd.DataFrame, data_path: str, **kwargs):
file_system=file_system,
**kwargs,
)
if apply_last_modified:
self.update_last_modified()

def write_image(self, image: Image.Image, data_path: str):
def write_image(
self, image: Image.Image, data_path: str, apply_last_modified: bool = True
):
"""Writes an image relative to the experiment"""
with open_location(self.fs_config) as (file_system, root_path):
write_image(image, join(root_path, data_path), file_system=file_system)
if apply_last_modified:
self.update_last_modified()

def read_image(self, data_path: str) -> Image.Image:
"""Reads an image relative to the experiment"""
Expand All @@ -114,3 +136,18 @@ def instantiate_datadescription_from_yaml(self) -> DataDescription:
file_system=exp_fs,
)
return data_description

def update_last_modified(self, timestamp: Optional[str] = None):
"""Updates the last modified timestamp of the experiment info"""
timestamp = timestamp or generate_timestamp()
try:
exp_info_dict = self.read_yaml(ExperimentFilenames.EXP_INFO)
exp_info_dict[LAST_MODIFIED_KEY] = timestamp
self.write_yaml(
exp_info_dict, ExperimentFilenames.EXP_INFO, apply_last_modified=False
)
except FileNotFoundError:
logging.getLogger(__name__).warning(
"Could not update last modified timestamp, because the "
"experiment info file was not found."
)
10 changes: 9 additions & 1 deletion niceml/experiments/experimentinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
EXP_TYPE_KEY,
RUN_ID_KEY,
SHORT_ID_KEY,
LAST_MODIFIED_KEY,
)
from niceml.utilities.idutils import ALPHANUMERICLIST
from niceml.utilities.ioutils import read_yaml
Expand All @@ -34,6 +35,7 @@ class ExperimentInfo:
description: str
exp_dir: str
exp_filepath: Optional[str] = None
last_modified: Optional[str] = None

def as_save_dict(self) -> dict:
"""Returns a dictionary which can be saved to a yaml file"""
Expand All @@ -46,8 +48,13 @@ def as_save_dict(self) -> dict:
ENVIRONMENT_KEY: self.environment,
DESCRIPTION_KEY: self.description,
EXP_DIR_KEY: self.exp_dir,
LAST_MODIFIED_KEY: self.last_modified,
}

def is_modified(self, other) -> bool:
"""Checks if the other experiment info is modified"""
return self.last_modified != other.last_modified


def load_exp_info(
exp_info_file, file_system: Optional[AbstractFileSystem] = None
Expand All @@ -72,6 +79,7 @@ def experiment_info_factory(data: dict, path: Optional[str] = None) -> Experimen
description=data.get(DESCRIPTION_KEY, ""),
exp_dir=data.get(EXP_DIR_KEY, ""),
exp_filepath=path,
last_modified=data.get(LAST_MODIFIED_KEY, None),
)


Expand All @@ -91,7 +99,7 @@ def get_exp_id_from_name(input_name: str) -> str:
f"ID not found anywhere starting with 'id_': {input_name}"
)
cur_id = input_name[index + 3 : index + 7]
if len(cur_id) != 4:
if len(cur_id) != 4: # noqa: PLR2004
raise ExpIdNotFoundError(f"ID not complete: {input_name}")
if any((x not in ALPHANUMERICLIST for x in cur_id)):
raise ExpIdNotFoundError(
Expand Down
14 changes: 14 additions & 0 deletions niceml/experiments/experimentmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def add_experiment(self, experiment: ExperimentData):
self.exp_dict[experiment.get_run_id()] = experiment

def __contains__(self, exp_id: Union[str, ExperimentInfo]):
"""Checks if the experiment is in the manager"""
if type(exp_id) == ExperimentInfo:
exp_id = exp_id.short_id
for experiment in self.experiments:
Expand Down Expand Up @@ -99,6 +100,13 @@ def get_metrics(self, experiments: Optional[List[str]] = None) -> List[str]:

return sorted(list(metric_set))

def is_exp_modified(self, exp_id: str, new_time_str: str) -> bool:
"""Checks if the experiment has been modified"""
if exp_id not in self.exp_dict:
return True
exp = self.get_exp_by_id(exp_id)
return exp.exp_info.is_modified(new_time_str)

def get_datasets(self) -> List[str]:
"""Returns a list of all datasets used in the experiments"""
dataset_set: Set[str] = set()
Expand Down Expand Up @@ -234,6 +242,7 @@ def get_metrics_visu_df(
def get_value_information_dict(
self, info_path: List[str], list_connection_str: str = "x"
) -> Dict[Any, List[str]]:
"""Returns a dict with information about the values"""
value_information_dict = defaultdict(list)
for exp in self.experiments:
try:
Expand All @@ -254,17 +263,20 @@ def get_epochs_information_dict(self) -> Dict[int, List[str]]:
return epochs_information_dict

def get_datasets_information_dict(self) -> Dict[str, List[str]]:
"""Returns a dict with information about the datasets"""
datasets_information_dict = defaultdict(list)
for exp in self.experiments:
dataset = exp.get_experiment_path().split("/")[0]
datasets_information_dict[dataset].append(exp.get_short_id())
return datasets_information_dict

def get_dataset(self, exp: ExperimentData) -> str:
"""Returns the dataset of the given experiment"""
dataset = exp.get_experiment_path().split("/")[0]
return dataset

def get_date_information_dict(self) -> Dict[date, List[str]]:
"""Returns a dict with information about the dates"""
date_information_dict = defaultdict(list)
for exp in self.experiments:
date_string = exp.exp_info.run_id.split("T")[0]
Expand All @@ -273,6 +285,7 @@ def get_date_information_dict(self) -> Dict[date, List[str]]:
return date_information_dict

def get_experiment_type_information_dict(self) -> Dict[str, List[str]]:
"""Returns a dict with information about the experiment types"""
experiment_type_information_dict = defaultdict(list)
for exp in self.experiments:
experiment_type = exp.get_experiment_path().split("/")[-1].split("-")[0]
Expand Down Expand Up @@ -300,6 +313,7 @@ def local_exp_manager_factory(path: str) -> ExperimentManager:


def get_add_min_max(metric_name: str, mode_dict: Dict[str, str]) -> Tuple[bool, bool]:
"""Returns if min and max should be added"""
add_min: bool = True
add_max: bool = True
for key, mode in mode_dict.items():
Expand Down
Loading
Loading