diff --git a/CHANGELOG.md b/CHANGELOG.md index 8694168a..966d3844 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # CHANGELOG +## v0.17.0-dev.1 (2024-08-07) + +### Feature + +* feat: Add possibility to pass additional arguments when initializing a `ResultAnalyzer` (#134) + +## 📥 Pull Request Description + +This PR adds the following features and changes: + +- feat: Add possibility to pass additional arguments when initializing a +`ResultAnalyzer` +- refactor: Add an experiment context to the `initialize` function of +the `DataframeAnalyzer` +- refactor: Pass the experiment context of a train or eval pipeline run +to the result analyzer. + +## 👀 Affected Areas + +- Result analyzers +- Analyse op +- Train and eval job ([`6e48893`](https://github.com/codecentric-oss/niceml/commit/6e48893b6734a554eacaf30985926dd865f8a275)) + ## v0.16.1-dev.1 (2024-07-23) ### Ci diff --git a/niceml/__init__.py b/niceml/__init__.py index ac0bf49e..b070ec6b 100644 --- a/niceml/__init__.py +++ b/niceml/__init__.py @@ -1 +1 @@ -__version__ = "0.16.1-dev.1" +__version__ = "0.17.0-dev.1" diff --git a/niceml/dagster/ops/analysis.py b/niceml/dagster/ops/analysis.py index 05ad3d08..bbc7130d 100644 --- a/niceml/dagster/ops/analysis.py +++ b/niceml/dagster/ops/analysis.py @@ -47,7 +47,9 @@ def analysis( ) result_analyzer: ResultAnalyzer = instantiated_op_config["result_analyzer"] - result_analyzer.initialize(data_description) + result_analyzer.initialize( + data_description=data_description, exp_context=exp_context + ) for dataset_key, cur_pred_set in datasets.items(): context.log.info(f"Analyze dataset: {dataset_key}") diff --git a/niceml/mlcomponents/resultanalyzers/analyzer.py b/niceml/mlcomponents/resultanalyzers/analyzer.py index 8c21c06c..d49d26a0 100644 --- a/niceml/mlcomponents/resultanalyzers/analyzer.py +++ b/niceml/mlcomponents/resultanalyzers/analyzer.py @@ -1,4 +1,5 @@ """Module for the ABC ResultAnalyzer""" + from abc import ABC, abstractmethod from niceml.data.datadescriptions.datadescription import DataDescription @@ -11,10 +12,23 @@ class ResultAnalyzer(ABC): implementation of the ResultAnalyzer""" def __init__(self): + """Initialize an abstract result analyzer.""" + self.data_description = None - def initialize(self, data_description: DataDescription): - """Initializes the resultanalyzer and adds the data description""" + def initialize(self, *args, data_description: DataDescription, **kwargs): + """Initializes the ResultAnalyzer and adds the data description. This isn't done by the + `__init__` because some data is only available after initialising the ResultAnalyser, like + data_description. + + Args: + *args: Additional arguments that can be added to the ResultAnalyzer + data_description: DataDescription that is used by the ResultAnalyzer (available in the + `__call__` method). The data_description parameter contains + information about the data set, such as number of classes and + feature names. + **kwargs: Additional keyword arguments that can be added to the ResultAnalyzer + """ self.data_description = data_description @abstractmethod diff --git a/niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py b/niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py index 3832b860..25cc12f9 100644 --- a/niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py +++ b/niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py @@ -1,8 +1,9 @@ """Module for DataframeAnalyzer and DfMetric""" + import logging from abc import ABC, abstractmethod from os.path import basename, join -from typing import List +from typing import List, Optional import mlflow import pandas as pd @@ -17,6 +18,10 @@ class DfMetric(ABC): """metric of a dataframe""" + def __init__(self): + """Initializes a DfMetric and set the data_description attribute to None.""" + self.data_description: Optional[DataDescription] = None + def initialize(self, data_description: DataDescription): """Initializes the metric with a data_description""" self.data_description = data_description @@ -41,20 +46,34 @@ def __init__( super().__init__() self.parq_file_prefix = parq_file_prefix self.df_metrics: List[DfMetric] = metrics + self.experiment_context = None - def initialize(self, data_description: DataDescription): + def initialize( + self, + *args, + data_description: DataDescription, + exp_context: Optional[ExperimentContext] = None, + **kwargs, + ): """ The initialize function initialized the metrics in `self.metrics` This function is called once before the first call to the evaluate function. It can be used to initialize any variables that are needed - for evaluation. The data_description parameter contains information about the - data set, such as number of classes and feature names. + for evaluation. Args: - data_description: `DataDescription` used to initialize instances of - this class and the metrics + *args: Additional arguments that can be added to the ResultAnalyzer + data_description: DataDescription that is used by the ResultAnalyzer (available in the + `__call__` method). The data_description parameter contains + information about the data set, such as number of classes and + feature names. + exp_context: For some ResultAnalyzers + it may be necessary to obtain the experiment context before calling + the ResultAnalyzer (`__call__`). + **kwargs: Additional keyword arguments that can be added to the ResultAnalyzer """ - super().initialize(data_description) + super().initialize(data_description=data_description) + self.experiment_context = exp_context for cur_metric in self.df_metrics: cur_metric.initialize(data_description) diff --git a/pyproject.toml b/pyproject.toml index fd3fe251..051e37b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "niceml" -version = "0.16.1-dev.1" +version = "0.17.0-dev.1" description = "Welcome to niceML 🍦, a Python-based MLOps framework that uses TensorFlow and Dagster. This framework streamlines the development, and maintenance of machine learning models, providing an end-to-end solution for building efficient and scalable pipelines." authors = [ "Denis Stalz-John ",