Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebase main on dev branch #119

Merged
merged 6 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: 🔨 Setup poetry
uses: abatilo/[email protected]
with:
poetry-version: "1.4.0"
poetry-version: "1.7.1"
- name: 🔨Install dependencies
run: |
poetry config virtualenvs.create false
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ jobs:
- name: 🔨 Setup poetry
uses: abatilo/[email protected]
with:
poetry-version: "1.4.0"
poetry-version: "1.7.1"
- name: 🔨Install dependencies
run: |
poetry config virtualenvs.create false
pip install --no-build-isolation pendulum==2.1.2
poetry install --no-interaction -E visu -E tensorflow --with dev --no-ansi
- run: mkdir results && touch results/test-results-${{ matrix.python-version }}-${{matrix.os}}-summary.md
- name: 🧪 Run tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: 🔨 Setup poetry
uses: abatilo/[email protected]
with:
poetry-version: "1.4.0"
poetry-version: "1.7.1"
- name: 🔨Install dependencies
run: |
poetry config virtualenvs.create false
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

<!--next-version-placeholder-->

## v0.14.1 (2024-04-16)

### Fix

* Test pipeline pendulum dependency ([`299426a`](https://github.com/codecentric-oss/niceml/commit/299426a27cfaf4f46e79958f600cdc1a8ad05466))
* Update poetry in Github pipelines to 1.7.1 ([`2d0dd2e`](https://github.com/codecentric-oss/niceml/commit/2d0dd2e2f250fe7517c12b68f7d36802ddd73964))
* Load non-parq files with DFLoader load_df ([`b693710`](https://github.com/codecentric-oss/niceml/commit/b693710fdf03962a543579342ba95b359b634974))
* Add original model id to eval experiment info ([`494c101`](https://github.com/codecentric-oss/niceml/commit/494c101fcd5993971cdb1b39895494cf6de82293))

## v0.14.0 (2024-02-08)

### Feature
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ defaults:
- ops/[email protected]: exptests_default.yaml
# experiment locations
- shared/locations@globals: exp_locations.yaml
# ressources
# resources
- resources/[email protected]: res_mlflow_base.yaml
- _self_

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ defaults:
- ops/[email protected]: exptests_default.yaml
# experiment locations
- shared/locations@globals: exp_locations.yaml
# ressources
# resources
- resources/[email protected]: res_mlflow_base.yaml
- _self_

Expand Down
2 changes: 1 addition & 1 deletion niceml/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.14.0"
__version__ = "0.14.1"
1 change: 1 addition & 0 deletions niceml/config/envconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
DESCRIPTION_KEY = "DESCRIPTION"
LOCAL_EXP_CACHE_PATH_KEY = "LOCAL_EXP_CACHE_PATH"
LAST_MODIFIED_KEY = "LAST_MODIFIED"
PRETRAINED_MODEL_KEY = "PRETRAINED_MODEL"


def replace_id_keys(input_str: str, short_id: str, run_id: str) -> str:
Expand Down
9 changes: 8 additions & 1 deletion niceml/dagster/ops/evalcopyexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@
from fsspec import AbstractFileSystem
from tqdm import tqdm

from niceml.config.envconfig import DESCRIPTION_KEY, RUN_ID_KEY, SHORT_ID_KEY
from niceml.config.envconfig import (
DESCRIPTION_KEY,
RUN_ID_KEY,
SHORT_ID_KEY,
PRETRAINED_MODEL_KEY,
ENVIRONMENT_KEY,
)
from niceml.dagster.ops.experiment import create_exp_settings
from niceml.experiments.experimentcontext import ExperimentContext
from niceml.experiments.expfilenames import ExperimentFilenames, ExpEvalCopyNames
Expand Down Expand Up @@ -84,6 +90,7 @@ def change_ids_from_expinfo(
with file_system.open(exp_info_path, "r") as cur_file:
data = yaml.load(cur_file, Loader=yaml.SafeLoader)

data[ENVIRONMENT_KEY][PRETRAINED_MODEL_KEY] = data[SHORT_ID_KEY]
data[RUN_ID_KEY] = run_id
data[SHORT_ID_KEY] = short_id
data[DESCRIPTION_KEY] = (
Expand Down
2 changes: 1 addition & 1 deletion niceml/dashboard/components/expviscomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def __init__(
target_value_list: Optional[List[Any]] = None,
assert_on_error: bool = False,
):
# Create empty list for chart images
self.component_name: Optional[str] = component_name
# Create empty list for chart images
self.chart_images_list: List[Image.Image] = []
self.meta_function = meta_function
self.target_value_list = [] if target_value_list is None else target_value_list
Expand Down
4 changes: 4 additions & 0 deletions niceml/dashboard/components/prefixviscomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def _render(
subset_name: Optional[str]: Render the experiment data to a subset

"""
# select components for prefix
exp_data_list: List[ExperimentData] = [
exp_manager.get_exp_by_id(exp_id) for exp_id in exp_ids
]
Expand Down Expand Up @@ -74,11 +75,14 @@ def _render(
comp.get_component_name() or f"Component {comp_index}"
)

# arrange tabs
comp_index = 0
if self.use_tabs:
st_comp_list = list(st.tabs(comp_names))
else:
st_comp_list = [st.expander(label) for label in comp_names]

# render components
for comp_key, cur_comps in self.components.items():
if comp_key in exp_dict:
for comp in cur_comps:
Expand Down
39 changes: 26 additions & 13 deletions niceml/data/dataloaders/dfloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
from niceml.data.dataloaders.interfaces.dfloader import DfLoader
from niceml.data.storages.localstorage import LocalStorage
from niceml.data.storages.storageinterface import StorageInterface
from niceml.experiments.loaddatafunctions import LoadParquetFile
from niceml.utilities.ioutils import read_parquet, write_parquet
from niceml.experiments.loaddatafunctions import LoadParquetFile, LoadCsvFile
from niceml.utilities.ioutils import read_parquet, write_parquet, read_csv, write_csv


class SimpleDfLoader(DfLoader): # pylint: disable=too-few-public-methods
"""SimpleLoader for parquet files"""
"""SimpleLoader for parquet or csv files"""

def __init__(
self,
Expand All @@ -25,22 +25,29 @@ def __init__(
self.storage = storage or LocalStorage()
self.working_dir = working_dir

def load_df(self, df_path: str) -> pd.DataFrame:
"""Loads and returns a dataframe from a given parquet file path"""
target_path = join(self.working_dir, df_path) if self.working_dir else df_path
return LoadParquetFile().load_data(target_path, self.storage)
def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
"""Loads and returns a dataframe from a given parquet or csv file path"""
target_path = (
self.storage.join_paths(self.working_dir, df_path)
if self.working_dir
else df_path
)
if ".parq" in target_path:
return LoadParquetFile().load_data(target_path, self.storage)
else:
return LoadCsvFile().load_data(target_path, self.storage, **kwargs)


class SimpleDfLoaderFactory(DfLoaderFactory): # pylint: disable=too-few-public-methods
"""SimpleLoader for parquet files"""
"""SimpleLoader for parquet or csv files"""

def create_df_loader(self, storage: StorageInterface, working_dir: str) -> DfLoader:
"""Returns SimpleDfLoader"""
return SimpleDfLoader(storage, working_dir)


class RemoteDiskCachedDfLoader(DfLoader): # pylint: disable=too-few-public-methods
"""SimpleLoader for parquet files from cache or remote storage"""
"""SimpleLoader for parquet or csv files from cache or remote storage"""

def __init__(
self,
Expand All @@ -53,7 +60,7 @@ def __init__(
self.cache_path = cache_dir
self.working_dir = working_dir

def load_df(self, df_path: str) -> pd.DataFrame:
def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
"""Loads and returns dataframe from cache"""
target_path = (
self.storage.join_paths(self.working_dir, df_path)
Expand All @@ -62,14 +69,20 @@ def load_df(self, df_path: str) -> pd.DataFrame:
)
cached_filepath = join(self.cache_path, target_path)
if isfile(cached_filepath):
dataframe = read_parquet(cached_filepath)
else:
if ".parq" in target_path:
dataframe = read_parquet(cached_filepath)
else:
dataframe = read_csv(cached_filepath, **kwargs)
elif ".parq" in target_path:
dataframe = LoadParquetFile().load_data(target_path, self.storage)
write_parquet(dataframe, cached_filepath)
else:
dataframe = LoadCsvFile().load_data(target_path, self.storage, **kwargs)
write_csv(dataframe, cached_filepath, **kwargs)
return dataframe


class RemoteDiskCachedDfLoaderFactory( # QUEST: still used?
class RemoteDiskCachedDfLoaderFactory(
DfLoaderFactory
): # pylint: disable=too-few-public-methods
"""Factory of RemoteDiskCachedDfLoader"""
Expand Down
2 changes: 1 addition & 1 deletion niceml/data/dataloaders/interfaces/dfloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ class DfLoader(ABC): # pylint: disable=too-few-public-methods
"""Abstract class DfLoader (Dataframe Loader)"""

@abstractmethod
def load_df(self, df_path: str) -> pd.DataFrame:
def load_df(self, df_path: str, **kwargs) -> pd.DataFrame:
"""Loads and returns the dataframe"""
11 changes: 9 additions & 2 deletions niceml/experiments/loaddatafunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,28 @@ class LoadYamlFile(LoadDataFunc): # pylint: disable=too-few-public-methods
"""Loads yaml data from a cloud storage"""

def load_data(self, file_path: str, storage: StorageInterface):
"""Loads yaml file from cloud storage"""
data = storage.download_as_str(file_path)
return yaml.load(data, Loader=yaml.SafeLoader)


class LoadCsvFile(LoadDataFunc): # pylint: disable=too-few-public-methods
"""Loads csv data from a cloud storage"""

def load_data(self, file_path: str, storage: StorageInterface):
def load_data(
self, file_path: str, storage: StorageInterface, **kwargs
) -> pd.DataFrame:
"""Loads csv file from cloud storage"""
data = storage.download_as_str(file_path)
data_frame = pd.read_csv(io.BytesIO(data))
data_frame = pd.read_csv(io.BytesIO(data), **kwargs)
return data_frame


class LoadParquetFile(LoadDataFunc): # pylint: disable=too-few-public-methods
"""Loads parquet data from a cloud storage"""

def load_data(self, file_path: str, storage: StorageInterface):
"""Loads parquet file from cloud storage"""
data = storage.download_as_str(file_path)
if data == b"":
raise FileNotFoundError("File empty")
Expand All @@ -54,10 +59,12 @@ class LoadImageFile(LoadDataFunc): # pylint: disable=too-few-public-methods
"""Loads image data from a cloud storage"""

def __init__(self, target_size: ImageSize, output_dtype=np.uint8):
"""Initialize LoadImageFile object"""
self.target_size = target_size
self.output_dtype = output_dtype

def load_data(self, file_path: str, storage: StorageInterface):
"""Loads image file from cloud storage"""
data = storage.download_as_str(file_path)
image: Image.Image = Image.open(io.BytesIO(data))
if self.target_size is not None:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "niceml"
version = "0.14.0"
version = "0.14.1"
description = "Welcome to niceML 🍦, a Python-based MLOps framework that uses TensorFlow and Dagster. This framework streamlines the development, and maintenance of machine learning models, providing an end-to-end solution for building efficient and scalable pipelines."
authors = [
"Denis Stalz-John <[email protected]>",
Expand Down
38 changes: 35 additions & 3 deletions tests/unit/niceml/data/dataloaders/test_dfloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from niceml.data.dataloaders.dfloaders import RemoteDiskCachedDfLoader, SimpleDfLoader
from niceml.data.storages.localstorage import LocalStorage
from niceml.utilities.ioutils import write_parquet
from niceml.utilities.ioutils import write_parquet, write_csv


@pytest.fixture()
Expand All @@ -34,14 +34,14 @@ def tmp_cache_dir() -> str:
yield tmpdir


def test_simple_df_loader(tmp_folder_with_parquet: str, example_df: pd.DataFrame):
def test_simple_df_loader_parq(tmp_folder_with_parquet: str, example_df: pd.DataFrame):
df_loader = SimpleDfLoader()
df_test = df_loader.load_df(join(tmp_folder_with_parquet, "test.parquet"))
assert isinstance(df_test, pd.DataFrame)
assert df_test.equals(example_df)


def test_remote_disk_cached_df_loader(
def test_remote_disk_cached_df_loader_parq(
tmp_folder_with_parquet: str, example_df: pd.DataFrame, tmp_cache_dir: str
):
storage = LocalStorage(tmp_folder_with_parquet)
Expand All @@ -57,3 +57,35 @@ def test_remote_disk_cached_df_loader(
df_test = df_loader.load_df("test.parquet")
assert isinstance(df_test, pd.DataFrame)
assert df_test.equals(example_df)


@pytest.fixture()
def tmp_folder_with_csv(example_df):
with TemporaryDirectory() as tmpdir:
write_csv(example_df, join(tmpdir, "test.csv"), sep=";")
yield tmpdir


def test_simple_df_loader_csv(tmp_folder_with_csv: str, example_df: pd.DataFrame):
df_loader = SimpleDfLoader()
df_test = df_loader.load_df(join(tmp_folder_with_csv, "test.csv"), sep=";")
assert isinstance(df_test, pd.DataFrame)
assert df_test.equals(example_df)


def test_remote_disk_cached_df_loader_csv(
tmp_folder_with_csv: str, example_df: pd.DataFrame, tmp_cache_dir: str
):
storage = LocalStorage(tmp_folder_with_csv)
df_loader = RemoteDiskCachedDfLoader(storage, tmp_cache_dir)
df_test = df_loader.load_df("test.csv", sep=";")
assert isinstance(df_test, pd.DataFrame)
assert df_test.equals(example_df)
assert isfile(join(tmp_cache_dir, "test.csv"))

# remove file from orig folder to test if it is loaded from cache
os.remove(join(tmp_folder_with_csv, "test.csv"))

df_test = df_loader.load_df("test.csv", sep=";")
assert isinstance(df_test, pd.DataFrame)
assert df_test.equals(example_df)
1 change: 0 additions & 1 deletion tests/unit/niceml/utilities/test_readwritelock.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
LocationConfig,
join_fs_path,
open_location,
join_location_w_path,
)
from niceml.utilities.readwritelock import (
ReadLock,
Expand Down
Loading