codecentric-oss · ankeko · Apr 16, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 15, 2024
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -19,7 +19,7 @@ jobs:
       - name: 🔨 Setup poetry
         uses: abatilo/[email protected]
         with:
-          poetry-version: "1.4.0"
+          poetry-version: "1.7.1"
       - name: 🔨Install dependencies
         run: |
           poetry config virtualenvs.create false

diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -27,10 +27,11 @@ jobs:
       - name: 🔨 Setup poetry
         uses: abatilo/[email protected]
         with:
-          poetry-version: "1.4.0"
+          poetry-version: "1.7.1"
       - name: 🔨Install dependencies
         run: |
           poetry config virtualenvs.create false
+          pip install --no-build-isolation pendulum==2.1.2
           poetry install --no-interaction -E visu -E tensorflow --with dev --no-ansi
       - run: mkdir results && touch results/test-results-${{ matrix.python-version }}-${{matrix.os}}-summary.md
       - name: 🧪 Run tests

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: 🔨 Setup poetry
         uses: abatilo/[email protected]
         with:
-          poetry-version: "1.4.0"
+          poetry-version: "1.7.1"
       - name: 🔨Install dependencies
         run: |
           poetry config virtualenvs.create false

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,15 @@
 
 <!--next-version-placeholder-->
 
+## v0.14.1 (2024-04-16)
+
+### Fix
+
+* Test pipeline pendulum dependency ([`299426a`](https://github.com/codecentric-oss/niceml/commit/299426a27cfaf4f46e79958f600cdc1a8ad05466))
+* Update poetry in Github pipelines to 1.7.1 ([`2d0dd2e`](https://github.com/codecentric-oss/niceml/commit/2d0dd2e2f250fe7517c12b68f7d36802ddd73964))
+* Load non-parq files with DFLoader load_df ([`b693710`](https://github.com/codecentric-oss/niceml/commit/b693710fdf03962a543579342ba95b359b634974))
+* Add original model id to eval experiment info ([`494c101`](https://github.com/codecentric-oss/niceml/commit/494c101fcd5993971cdb1b39895494cf6de82293))
+
 ## v0.14.0 (2024-02-08)
 
 ### Feature

diff --git a/configs/jobs/job_eval/job_eval_objdet/job_eval_objdet_number.yaml b/configs/jobs/job_eval/job_eval_objdet/job_eval_objdet_number.yaml
@@ -12,7 +12,7 @@ defaults:
   - ops/[email protected]: exptests_default.yaml
   # experiment locations
   - shared/locations@globals: exp_locations.yaml
-  # ressources
+  # resources
   - resources/[email protected]: res_mlflow_base.yaml
   - _self_
 

diff --git a/configs/jobs/job_eval/job_eval_reg/job_eval_reg_number.yaml b/configs/jobs/job_eval/job_eval_reg/job_eval_reg_number.yaml
@@ -12,7 +12,7 @@ defaults:
   - ops/[email protected]: exptests_default.yaml
   # experiment locations
   - shared/locations@globals: exp_locations.yaml
-  # ressources
+  # resources
   - resources/[email protected]: res_mlflow_base.yaml
   - _self_
 

diff --git a/niceml/__init__.py b/niceml/__init__.py
@@ -1 +1 @@
-__version__ = "0.14.0"
+__version__ = "0.14.1"
diff --git a/niceml/config/envconfig.py b/niceml/config/envconfig.py
@@ -13,6 +13,7 @@
 DESCRIPTION_KEY = "DESCRIPTION"
 LOCAL_EXP_CACHE_PATH_KEY = "LOCAL_EXP_CACHE_PATH"
 LAST_MODIFIED_KEY = "LAST_MODIFIED"
+PRETRAINED_MODEL_KEY = "PRETRAINED_MODEL"
 
 
 def replace_id_keys(input_str: str, short_id: str, run_id: str) -> str:

diff --git a/niceml/dagster/ops/evalcopyexp.py b/niceml/dagster/ops/evalcopyexp.py
@@ -7,7 +7,13 @@
 from fsspec import AbstractFileSystem
 from tqdm import tqdm
 
-from niceml.config.envconfig import DESCRIPTION_KEY, RUN_ID_KEY, SHORT_ID_KEY
+from niceml.config.envconfig import (
+    DESCRIPTION_KEY,
+    RUN_ID_KEY,
+    SHORT_ID_KEY,
+    PRETRAINED_MODEL_KEY,
+    ENVIRONMENT_KEY,
+)
 from niceml.dagster.ops.experiment import create_exp_settings
 from niceml.experiments.experimentcontext import ExperimentContext
 from niceml.experiments.expfilenames import ExperimentFilenames, ExpEvalCopyNames
@@ -84,6 +90,7 @@ def change_ids_from_expinfo(
     with file_system.open(exp_info_path, "r") as cur_file:
         data = yaml.load(cur_file, Loader=yaml.SafeLoader)
 
+    data[ENVIRONMENT_KEY][PRETRAINED_MODEL_KEY] = data[SHORT_ID_KEY]
     data[RUN_ID_KEY] = run_id
     data[SHORT_ID_KEY] = short_id
     data[DESCRIPTION_KEY] = (

diff --git a/niceml/dashboard/components/expviscomponent.py b/niceml/dashboard/components/expviscomponent.py
@@ -25,8 +25,8 @@ def __init__(
         target_value_list: Optional[List[Any]] = None,
         assert_on_error: bool = False,
     ):
-        # Create empty list for chart images
         self.component_name: Optional[str] = component_name
+        # Create empty list for chart images
         self.chart_images_list: List[Image.Image] = []
         self.meta_function = meta_function
         self.target_value_list = [] if target_value_list is None else target_value_list

diff --git a/niceml/dashboard/components/prefixviscomponent.py b/niceml/dashboard/components/prefixviscomponent.py
@@ -47,6 +47,7 @@ def _render(
             subset_name: Optional[str]: Render the experiment data to a subset
 
         """
+        # select components for prefix
         exp_data_list: List[ExperimentData] = [
             exp_manager.get_exp_by_id(exp_id) for exp_id in exp_ids
         ]
@@ -74,11 +75,14 @@ def _render(
                         comp.get_component_name() or f"Component {comp_index}"
                     )
 
+        # arrange tabs
         comp_index = 0
         if self.use_tabs:
             st_comp_list = list(st.tabs(comp_names))
         else:
             st_comp_list = [st.expander(label) for label in comp_names]
+
+        # render components
         for comp_key, cur_comps in self.components.items():
             if comp_key in exp_dict:
                 for comp in cur_comps:

diff --git a/niceml/data/dataloaders/dfloaders.py b/niceml/data/dataloaders/dfloaders.py
@@ -8,12 +8,12 @@
 from niceml.data.dataloaders.interfaces.dfloader import DfLoader
 from niceml.data.storages.localstorage import LocalStorage
 from niceml.data.storages.storageinterface import StorageInterface
-from niceml.experiments.loaddatafunctions import LoadParquetFile
-from niceml.utilities.ioutils import read_parquet, write_parquet
+from niceml.experiments.loaddatafunctions import LoadParquetFile, LoadCsvFile
+from niceml.utilities.ioutils import read_parquet, write_parquet, read_csv, write_csv
 
 
 class SimpleDfLoader(DfLoader):  # pylint: disable=too-few-public-methods
-    """SimpleLoader for parquet files"""
+    """SimpleLoader for parquet or csv files"""
 
     def __init__(
         self,
@@ -25,22 +25,29 @@ def __init__(
         self.storage = storage or LocalStorage()
         self.working_dir = working_dir
 
-    def load_df(self, df_path: str) -> pd.DataFrame:
-        """Loads and returns a dataframe from a given parquet file path"""
-        target_path = join(self.working_dir, df_path) if self.working_dir else df_path
-        return LoadParquetFile().load_data(target_path, self.storage)
+    def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
+        """Loads and returns a dataframe from a given parquet or csv file path"""
+        target_path = (
+            self.storage.join_paths(self.working_dir, df_path)
+            if self.working_dir
+            else df_path
+        )
+        if ".parq" in target_path:
+            return LoadParquetFile().load_data(target_path, self.storage)
+        else:
+            return LoadCsvFile().load_data(target_path, self.storage, **kwargs)
 
 
 class SimpleDfLoaderFactory(DfLoaderFactory):  # pylint: disable=too-few-public-methods
-    """SimpleLoader for parquet files"""
+    """SimpleLoader for parquet or csv files"""
 
     def create_df_loader(self, storage: StorageInterface, working_dir: str) -> DfLoader:
         """Returns SimpleDfLoader"""
         return SimpleDfLoader(storage, working_dir)
 
 
 class RemoteDiskCachedDfLoader(DfLoader):  # pylint: disable=too-few-public-methods
-    """SimpleLoader for parquet files from cache or remote storage"""
+    """SimpleLoader for parquet or csv files from cache or remote storage"""
 
     def __init__(
         self,
@@ -53,7 +60,7 @@ def __init__(
         self.cache_path = cache_dir
         self.working_dir = working_dir
 
-    def load_df(self, df_path: str) -> pd.DataFrame:
+    def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
         """Loads and returns dataframe from cache"""
         target_path = (
             self.storage.join_paths(self.working_dir, df_path)
@@ -62,14 +69,20 @@ def load_df(self, df_path: str) -> pd.DataFrame:
         )
         cached_filepath = join(self.cache_path, target_path)
         if isfile(cached_filepath):
-            dataframe = read_parquet(cached_filepath)
-        else:
+            if ".parq" in target_path:
+                dataframe = read_parquet(cached_filepath)
+            else:
+                dataframe = read_csv(cached_filepath, **kwargs)
+        elif ".parq" in target_path:
             dataframe = LoadParquetFile().load_data(target_path, self.storage)
             write_parquet(dataframe, cached_filepath)
+        else:
+            dataframe = LoadCsvFile().load_data(target_path, self.storage, **kwargs)
+            write_csv(dataframe, cached_filepath, **kwargs)
         return dataframe
 
 
-class RemoteDiskCachedDfLoaderFactory(  # QUEST: still used?
+class RemoteDiskCachedDfLoaderFactory(
     DfLoaderFactory
 ):  # pylint: disable=too-few-public-methods
     """Factory of RemoteDiskCachedDfLoader"""

diff --git a/niceml/data/dataloaders/interfaces/dfloader.py b/niceml/data/dataloaders/interfaces/dfloader.py
@@ -8,5 +8,5 @@ class DfLoader(ABC):  # pylint: disable=too-few-public-methods
     """Abstract class DfLoader (Dataframe Loader)"""
 
     @abstractmethod
-    def load_df(self, df_path: str) -> pd.DataFrame:
+    def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
         """Loads and returns the dataframe"""
diff --git a/niceml/experiments/loaddatafunctions.py b/niceml/experiments/loaddatafunctions.py
@@ -25,23 +25,28 @@ class LoadYamlFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads yaml data from a cloud storage"""
 
     def load_data(self, file_path: str, storage: StorageInterface):
+        """Loads yaml file from cloud storage"""
         data = storage.download_as_str(file_path)
         return yaml.load(data, Loader=yaml.SafeLoader)
 
 
 class LoadCsvFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads csv data from a cloud storage"""
 
-    def load_data(self, file_path: str, storage: StorageInterface):
+    def load_data(
+        self, file_path: str, storage: StorageInterface, **kwargs
+    ) -> pd.DataFrame:
+        """Loads csv file from cloud storage"""
         data = storage.download_as_str(file_path)
-        data_frame = pd.read_csv(io.BytesIO(data))
+        data_frame = pd.read_csv(io.BytesIO(data), **kwargs)
         return data_frame
 
 
 class LoadParquetFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads parquet data from a cloud storage"""
 
     def load_data(self, file_path: str, storage: StorageInterface):
+        """Loads parquet file from cloud storage"""
         data = storage.download_as_str(file_path)
         if data == b"":
             raise FileNotFoundError("File empty")
@@ -54,10 +59,12 @@ class LoadImageFile(LoadDataFunc):  # pylint: disable=too-few-public-methods
     """Loads image data from a cloud storage"""
 
     def __init__(self, target_size: ImageSize, output_dtype=np.uint8):
+        """Initialize LoadImageFile object"""
         self.target_size = target_size
         self.output_dtype = output_dtype
 
     def load_data(self, file_path: str, storage: StorageInterface):
+        """Loads image file from cloud storage"""
         data = storage.download_as_str(file_path)
         image: Image.Image = Image.open(io.BytesIO(data))
         if self.target_size is not None:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "niceml"
-version = "0.14.0"
+version = "0.14.1"
 description = "Welcome to niceML 🍦, a Python-based MLOps framework that uses TensorFlow and Dagster. This framework streamlines the development, and maintenance of machine learning models, providing an end-to-end solution for building efficient and scalable pipelines."
 authors = [
     "Denis Stalz-John <[email protected]>",

diff --git a/tests/unit/niceml/data/dataloaders/test_dfloaders.py b/tests/unit/niceml/data/dataloaders/test_dfloaders.py
@@ -7,7 +7,7 @@
 
 from niceml.data.dataloaders.dfloaders import RemoteDiskCachedDfLoader, SimpleDfLoader
 from niceml.data.storages.localstorage import LocalStorage
-from niceml.utilities.ioutils import write_parquet
+from niceml.utilities.ioutils import write_parquet, write_csv
 
 
 @pytest.fixture()
@@ -34,14 +34,14 @@ def tmp_cache_dir() -> str:
         yield tmpdir
 
 
-def test_simple_df_loader(tmp_folder_with_parquet: str, example_df: pd.DataFrame):
+def test_simple_df_loader_parq(tmp_folder_with_parquet: str, example_df: pd.DataFrame):
     df_loader = SimpleDfLoader()
     df_test = df_loader.load_df(join(tmp_folder_with_parquet, "test.parquet"))
     assert isinstance(df_test, pd.DataFrame)
     assert df_test.equals(example_df)
 
 
-def test_remote_disk_cached_df_loader(
+def test_remote_disk_cached_df_loader_parq(
     tmp_folder_with_parquet: str, example_df: pd.DataFrame, tmp_cache_dir: str
 ):
     storage = LocalStorage(tmp_folder_with_parquet)
@@ -57,3 +57,35 @@ def test_remote_disk_cached_df_loader(
     df_test = df_loader.load_df("test.parquet")
     assert isinstance(df_test, pd.DataFrame)
     assert df_test.equals(example_df)
+
+
+@pytest.fixture()
+def tmp_folder_with_csv(example_df):
+    with TemporaryDirectory() as tmpdir:
+        write_csv(example_df, join(tmpdir, "test.csv"), sep=";")
+        yield tmpdir
+
+
+def test_simple_df_loader_csv(tmp_folder_with_csv: str, example_df: pd.DataFrame):
+    df_loader = SimpleDfLoader()
+    df_test = df_loader.load_df(join(tmp_folder_with_csv, "test.csv"), sep=";")
+    assert isinstance(df_test, pd.DataFrame)
+    assert df_test.equals(example_df)
+
+
+def test_remote_disk_cached_df_loader_csv(
+    tmp_folder_with_csv: str, example_df: pd.DataFrame, tmp_cache_dir: str
+):
+    storage = LocalStorage(tmp_folder_with_csv)
+    df_loader = RemoteDiskCachedDfLoader(storage, tmp_cache_dir)
+    df_test = df_loader.load_df("test.csv", sep=";")
+    assert isinstance(df_test, pd.DataFrame)
+    assert df_test.equals(example_df)
+    assert isfile(join(tmp_cache_dir, "test.csv"))
+
+    # remove file from orig folder to test if it is loaded from cache
+    os.remove(join(tmp_folder_with_csv, "test.csv"))
+
+    df_test = df_loader.load_df("test.csv", sep=";")
+    assert isinstance(df_test, pd.DataFrame)
+    assert df_test.equals(example_df)
diff --git a/tests/unit/niceml/utilities/test_readwritelock.py b/tests/unit/niceml/utilities/test_readwritelock.py
@@ -9,7 +9,6 @@
     LocationConfig,
     join_fs_path,
     open_location,
-    join_location_w_path,
 )
 from niceml.utilities.readwritelock import (
     ReadLock,