Skip to content

Commit

Permalink
feat: Add possibility to pass additional arguments when initializing …
Browse files Browse the repository at this point in the history
…a `ResultAnalyzer` (#134)

## 📥 Pull Request Description

This PR adds the following features and changes:

- feat: Add possibility to pass additional arguments when initializing a
`ResultAnalyzer`
- refactor: Add an experiment context to the `initialize` function of
the `DataframeAnalyzer`
- refactor: Pass the experiment context of a train or eval pipeline run
to the result analyzer.

## 👀 Affected Areas

- Result analyzers
- Analyse op 
- Train and eval job
  • Loading branch information
aiakide authored Aug 7, 2024
1 parent aee7174 commit 6e48893
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
4 changes: 3 additions & 1 deletion niceml/dagster/ops/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def analysis(
)

result_analyzer: ResultAnalyzer = instantiated_op_config["result_analyzer"]
result_analyzer.initialize(data_description)
result_analyzer.initialize(
data_description=data_description, exp_context=exp_context
)

for dataset_key, cur_pred_set in datasets.items():
context.log.info(f"Analyze dataset: {dataset_key}")
Expand Down
18 changes: 16 additions & 2 deletions niceml/mlcomponents/resultanalyzers/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module for the ABC ResultAnalyzer"""

from abc import ABC, abstractmethod

from niceml.data.datadescriptions.datadescription import DataDescription
Expand All @@ -11,10 +12,23 @@ class ResultAnalyzer(ABC):
implementation of the ResultAnalyzer"""

def __init__(self):
"""Initialize an abstract result analyzer."""

self.data_description = None

def initialize(self, data_description: DataDescription):
"""Initializes the resultanalyzer and adds the data description"""
def initialize(self, *args, data_description: DataDescription, **kwargs):
"""Initializes the ResultAnalyzer and adds the data description. This isn't done by the
`__init__` because some data is only available after initialising the ResultAnalyser, like
data_description.
Args:
*args: Additional arguments that can be added to the ResultAnalyzer
data_description: DataDescription that is used by the ResultAnalyzer (available in the
`__call__` method). The data_description parameter contains
information about the data set, such as number of classes and
feature names.
**kwargs: Additional keyword arguments that can be added to the ResultAnalyzer
"""
self.data_description = data_description

@abstractmethod
Expand Down
33 changes: 26 additions & 7 deletions niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Module for DataframeAnalyzer and DfMetric"""

import logging
from abc import ABC, abstractmethod
from os.path import basename, join
from typing import List
from typing import List, Optional

import mlflow
import pandas as pd
Expand All @@ -17,6 +18,10 @@
class DfMetric(ABC):
"""metric of a dataframe"""

def __init__(self):
"""Initializes a DfMetric and set the data_description attribute to None."""
self.data_description: Optional[DataDescription] = None

def initialize(self, data_description: DataDescription):
"""Initializes the metric with a data_description"""
self.data_description = data_description
Expand All @@ -41,20 +46,34 @@ def __init__(
super().__init__()
self.parq_file_prefix = parq_file_prefix
self.df_metrics: List[DfMetric] = metrics
self.experiment_context = None

def initialize(self, data_description: DataDescription):
def initialize(
self,
*args,
data_description: DataDescription,
exp_context: Optional[ExperimentContext] = None,
**kwargs,
):
"""
The initialize function initialized the metrics in `self.metrics`
This function is called once before the first call to the
evaluate function. It can be used to initialize any variables that are needed
for evaluation. The data_description parameter contains information about the
data set, such as number of classes and feature names.
for evaluation.
Args:
data_description: `DataDescription` used to initialize instances of
this class and the metrics
*args: Additional arguments that can be added to the ResultAnalyzer
data_description: DataDescription that is used by the ResultAnalyzer (available in the
`__call__` method). The data_description parameter contains
information about the data set, such as number of classes and
feature names.
exp_context: For some ResultAnalyzers
it may be necessary to obtain the experiment context before calling
the ResultAnalyzer (`__call__`).
**kwargs: Additional keyword arguments that can be added to the ResultAnalyzer
"""
super().initialize(data_description)
super().initialize(data_description=data_description)
self.experiment_context = exp_context
for cur_metric in self.df_metrics:
cur_metric.initialize(data_description)

Expand Down

0 comments on commit 6e48893

Please sign in to comment.