From a92a31cff32857948bfde94f976f0f6775929e3d Mon Sep 17 00:00:00 2001 From: Anke Koke Date: Thu, 13 Jun 2024 10:27:41 +0200 Subject: [PATCH 1/2] feat: add file loader --- .../factories/fileloaderfactory.py | 15 ++++ .../data/dataloaders/interfaces/fileloader.py | 11 +++ niceml/data/dataloaders/yamlfileloader.py | 90 +++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 niceml/data/dataloaders/factories/fileloaderfactory.py create mode 100644 niceml/data/dataloaders/interfaces/fileloader.py create mode 100644 niceml/data/dataloaders/yamlfileloader.py diff --git a/niceml/data/dataloaders/factories/fileloaderfactory.py b/niceml/data/dataloaders/factories/fileloaderfactory.py new file mode 100644 index 00000000..5c1a43b8 --- /dev/null +++ b/niceml/data/dataloaders/factories/fileloaderfactory.py @@ -0,0 +1,15 @@ +"""Module for FileLoaderFactory""" +from abc import ABC, abstractmethod + +from niceml.data.dataloaders.interfaces.fileloader import FileLoader +from niceml.data.storages.storageinterface import StorageInterface + + +class FileLoaderFactory(ABC): # pylint: disable=too-few-public-methods + """Abstract implementation of FileLoaderFactory""" + + @abstractmethod + def create_file_loader( + self, storage: StorageInterface, working_dir: str + ) -> FileLoader: + """Creates a file loader""" diff --git a/niceml/data/dataloaders/interfaces/fileloader.py b/niceml/data/dataloaders/interfaces/fileloader.py new file mode 100644 index 00000000..f71425a3 --- /dev/null +++ b/niceml/data/dataloaders/interfaces/fileloader.py @@ -0,0 +1,11 @@ +"""Module for FileLoader""" +from abc import ABC, abstractmethod +from typing import Union + + +class FileLoader(ABC): # pylint: disable=too-few-public-methods + """Abstract class DfLoader (Dataframe Loader)""" + + @abstractmethod + def load_file(self, file_path: str, **kwargs) -> Union[str, dict, list]: + """Loads and returns the content of the given file""" diff --git a/niceml/data/dataloaders/yamlfileloader.py b/niceml/data/dataloaders/yamlfileloader.py new file mode 100644 index 00000000..b49b7a9b --- /dev/null +++ b/niceml/data/dataloaders/yamlfileloader.py @@ -0,0 +1,90 @@ +"""Module for YamlFileLoaders""" +from os.path import join, isfile +from typing import Optional + +from niceml.data.dataloaders.factories.fileloaderfactory import FileLoaderFactory +from niceml.data.dataloaders.interfaces.fileloader import FileLoader +from niceml.data.storages.localstorage import LocalStorage +from niceml.data.storages.storageinterface import StorageInterface +from niceml.experiments.loaddatafunctions import LoadYamlFile +from niceml.utilities.ioutils import read_yaml, write_yaml + + +class RemoteDiskCachedYamlFileLoader(FileLoader): + """Remote Yaml file loader which creates cache data""" + + def __init__( + self, + storage: StorageInterface, + cache_dir: str, + working_dir: str, + ): + """Yaml file loader""" + self.storage = storage + self.cache_path = cache_dir + self.working_dir = working_dir + + def load_file(self, file_path: str, **kwargs) -> dict: + """Loads and returns dataframe from remote or cache""" + target_path = ( + self.storage.join_paths(self.working_dir, file_path) + if self.working_dir + else file_path + ) + cached_filepath = join(self.cache_path, target_path) + if isfile(cached_filepath): + yaml_dict = read_yaml(cached_filepath) + else: + yaml_dict = LoadYamlFile().load_data(target_path, self.storage) + write_yaml(yaml_dict, cached_filepath) + return yaml_dict + + +class RemoteDiskCachedYamlFileLoaderFactory( + FileLoaderFactory +): # pylint: disable=too-few-public-methods + """Factory of RemoteDiskCachedYamlFileLoader""" + + def __init__(self, cache_dir: str): + """Initialize a Factory for RemoteDiskCachedYamlFileLoader""" + self.cache_path = cache_dir + + def create_file_loader( + self, storage: StorageInterface, working_dir: str + ) -> FileLoader: + """Returns a RemoteDiskCachedYamlFileLoader""" + return RemoteDiskCachedYamlFileLoader(storage, self.cache_path, working_dir) + + +class SimpleYamlFileLoader(FileLoader): + """Simple Yaml file loader (e.g. for local files)""" + + def __init__( + self, + storage: Optional[StorageInterface] = None, + working_dir: Optional[str] = None, + ): + """Yaml file loader""" + self.storage = storage or LocalStorage() + self.working_dir = working_dir + + def load_file(self, file_path: str, **kwargs) -> dict: + """Loads and returns dataframe""" + target_path = ( + self.storage.join_paths(self.working_dir, file_path) + if self.working_dir + else file_path + ) + return LoadYamlFile().load_data(target_path, self.storage) + + +class SimpleYamlFileLoaderFactory( + FileLoaderFactory +): # pylint: disable=too-few-public-methods + """Factory of SimpleYamlFileLoader""" + + def create_file_loader( + self, storage: StorageInterface, working_dir: str + ) -> FileLoader: + """Returns a SimpleYamlFileLoader""" + return SimpleYamlFileLoader(storage, working_dir) From 87a87a01e897f51bf789bceee23e7068b6fc9de6 Mon Sep 17 00:00:00 2001 From: Anke Koke Date: Thu, 13 Jun 2024 10:28:31 +0200 Subject: [PATCH 2/2] fix: localizeexperiment with location config --- niceml/dagster/ops/localizeexperiment.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/niceml/dagster/ops/localizeexperiment.py b/niceml/dagster/ops/localizeexperiment.py index 47ae1c18..6c79f513 100644 --- a/niceml/dagster/ops/localizeexperiment.py +++ b/niceml/dagster/ops/localizeexperiment.py @@ -7,7 +7,11 @@ from niceml.experiments.experimentinfo import ExperimentInfo, load_exp_info from niceml.experiments.expfilenames import ExperimentFilenames from niceml.experiments.exppathfinder import get_exp_filepath -from niceml.utilities.fsspec.locationutils import join_location_w_path, open_location +from niceml.utilities.fsspec.locationutils import ( + join_location_w_path, + open_location, + LocationConfig, +) from dagster import Field, Noneable, OpExecutionContext, op @@ -34,7 +38,7 @@ def localize_experiment(context: OpExecutionContext) -> ExperimentContext: """This op localizes the experiment and returns the experiment context""" op_config = json.loads(json.dumps(context.op_config)) - exp_out_location: dict = op_config["exp_out_location"] + exp_out_location: LocationConfig = LocationConfig(**op_config["exp_out_location"]) exp_path = get_exp_filepath(exp_out_location, op_config["existing_experiment"]) try: