Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add file loader #125

Merged
merged 2 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions niceml/dagster/ops/localizeexperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
from niceml.experiments.experimentinfo import ExperimentInfo, load_exp_info
from niceml.experiments.expfilenames import ExperimentFilenames
from niceml.experiments.exppathfinder import get_exp_filepath
from niceml.utilities.fsspec.locationutils import join_location_w_path, open_location
from niceml.utilities.fsspec.locationutils import (
join_location_w_path,
open_location,
LocationConfig,
)
from dagster import Field, Noneable, OpExecutionContext, op


Expand All @@ -34,7 +38,7 @@
def localize_experiment(context: OpExecutionContext) -> ExperimentContext:
"""This op localizes the experiment and returns the experiment context"""
op_config = json.loads(json.dumps(context.op_config))
exp_out_location: dict = op_config["exp_out_location"]
exp_out_location: LocationConfig = LocationConfig(**op_config["exp_out_location"])

exp_path = get_exp_filepath(exp_out_location, op_config["existing_experiment"])
try:
Expand Down
15 changes: 15 additions & 0 deletions niceml/data/dataloaders/factories/fileloaderfactory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Module for FileLoaderFactory"""
from abc import ABC, abstractmethod

from niceml.data.dataloaders.interfaces.fileloader import FileLoader
from niceml.data.storages.storageinterface import StorageInterface


class FileLoaderFactory(ABC): # pylint: disable=too-few-public-methods
"""Abstract implementation of FileLoaderFactory"""

@abstractmethod
def create_file_loader(
self, storage: StorageInterface, working_dir: str
) -> FileLoader:
"""Creates a file loader"""
11 changes: 11 additions & 0 deletions niceml/data/dataloaders/interfaces/fileloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Module for FileLoader"""
from abc import ABC, abstractmethod
from typing import Union


class FileLoader(ABC): # pylint: disable=too-few-public-methods
"""Abstract class DfLoader (Dataframe Loader)"""

@abstractmethod
def load_file(self, file_path: str, **kwargs) -> Union[str, dict, list]:
"""Loads and returns the content of the given file"""
90 changes: 90 additions & 0 deletions niceml/data/dataloaders/yamlfileloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Module for YamlFileLoaders"""
from os.path import join, isfile
from typing import Optional

from niceml.data.dataloaders.factories.fileloaderfactory import FileLoaderFactory
from niceml.data.dataloaders.interfaces.fileloader import FileLoader
from niceml.data.storages.localstorage import LocalStorage
from niceml.data.storages.storageinterface import StorageInterface
from niceml.experiments.loaddatafunctions import LoadYamlFile
from niceml.utilities.ioutils import read_yaml, write_yaml


class RemoteDiskCachedYamlFileLoader(FileLoader):
"""Remote Yaml file loader which creates cache data"""

def __init__(
self,
storage: StorageInterface,
cache_dir: str,
working_dir: str,
):
"""Yaml file loader"""
self.storage = storage
self.cache_path = cache_dir
self.working_dir = working_dir

def load_file(self, file_path: str, **kwargs) -> dict:
"""Loads and returns dataframe from remote or cache"""
target_path = (
self.storage.join_paths(self.working_dir, file_path)
if self.working_dir
else file_path
)
cached_filepath = join(self.cache_path, target_path)
if isfile(cached_filepath):
yaml_dict = read_yaml(cached_filepath)
else:
yaml_dict = LoadYamlFile().load_data(target_path, self.storage)
write_yaml(yaml_dict, cached_filepath)
return yaml_dict


class RemoteDiskCachedYamlFileLoaderFactory(
FileLoaderFactory
): # pylint: disable=too-few-public-methods
"""Factory of RemoteDiskCachedYamlFileLoader"""

def __init__(self, cache_dir: str):
"""Initialize a Factory for RemoteDiskCachedYamlFileLoader"""
self.cache_path = cache_dir

def create_file_loader(
self, storage: StorageInterface, working_dir: str
) -> FileLoader:
"""Returns a RemoteDiskCachedYamlFileLoader"""
return RemoteDiskCachedYamlFileLoader(storage, self.cache_path, working_dir)


class SimpleYamlFileLoader(FileLoader):
"""Simple Yaml file loader (e.g. for local files)"""

def __init__(
self,
storage: Optional[StorageInterface] = None,
working_dir: Optional[str] = None,
):
"""Yaml file loader"""
self.storage = storage or LocalStorage()
self.working_dir = working_dir

def load_file(self, file_path: str, **kwargs) -> dict:
"""Loads and returns dataframe"""
target_path = (
self.storage.join_paths(self.working_dir, file_path)
if self.working_dir
else file_path
)
return LoadYamlFile().load_data(target_path, self.storage)


class SimpleYamlFileLoaderFactory(
FileLoaderFactory
): # pylint: disable=too-few-public-methods
"""Factory of SimpleYamlFileLoader"""

def create_file_loader(
self, storage: StorageInterface, working_dir: str
) -> FileLoader:
"""Returns a SimpleYamlFileLoader"""
return SimpleYamlFileLoader(storage, working_dir)
Loading