feat: Add possibility to pass additional arguments when initializing …

…a `ResultAnalyzer` (#134) ## 📥 Pull Request Description This PR adds the following features and changes: - feat: Add possibility to pass additional arguments when initializing a `ResultAnalyzer` - refactor: Add an experiment context to the `initialize` function of the `DataframeAnalyzer` - refactor: Pass the experiment context of a train or eval pipeline run to the result analyzer. ## 👀 Affected Areas - Result analyzers - Analyse op - Train and eval job
codecentric-oss · Aug 7, 2024 · 6e48893 · 6e48893
1 parent aee7174
commit 6e48893
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 10 deletions.
diff --git a/niceml/dagster/ops/analysis.py b/niceml/dagster/ops/analysis.py
@@ -47,7 +47,9 @@ def analysis(
     )
 
     result_analyzer: ResultAnalyzer = instantiated_op_config["result_analyzer"]
-    result_analyzer.initialize(data_description)
+    result_analyzer.initialize(
+        data_description=data_description, exp_context=exp_context
+    )
 
     for dataset_key, cur_pred_set in datasets.items():
         context.log.info(f"Analyze dataset: {dataset_key}")

diff --git a/niceml/mlcomponents/resultanalyzers/analyzer.py b/niceml/mlcomponents/resultanalyzers/analyzer.py
@@ -1,4 +1,5 @@
 """Module for the ABC ResultAnalyzer"""
+
 from abc import ABC, abstractmethod
 
 from niceml.data.datadescriptions.datadescription import DataDescription
@@ -11,10 +12,23 @@ class ResultAnalyzer(ABC):
     implementation of the ResultAnalyzer"""
 
     def __init__(self):
+        """Initialize an abstract result analyzer."""
+
         self.data_description = None
 
-    def initialize(self, data_description: DataDescription):
-        """Initializes the resultanalyzer and adds the data description"""
+    def initialize(self, *args, data_description: DataDescription, **kwargs):
+        """Initializes the ResultAnalyzer and adds the data description. This isn't done by the
+        `__init__` because some data is only available after initialising the ResultAnalyser, like
+        data_description.
+
+        Args:
+            *args: Additional arguments that can be added to the ResultAnalyzer
+            data_description:   DataDescription that is used by the ResultAnalyzer (available in the
+                                `__call__` method). The data_description parameter contains
+                                information about the data set, such as number of classes and
+                                feature names.
+            **kwargs: Additional keyword arguments that can be added to the ResultAnalyzer
+        """
         self.data_description = data_description
 
     @abstractmethod

diff --git a/niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py b/niceml/mlcomponents/resultanalyzers/dataframes/dfanalyzer.py
@@ -1,8 +1,9 @@
 """Module for DataframeAnalyzer and DfMetric"""
+
 import logging
 from abc import ABC, abstractmethod
 from os.path import basename, join
-from typing import List
+from typing import List, Optional
 
 import mlflow
 import pandas as pd
@@ -17,6 +18,10 @@
 class DfMetric(ABC):
     """metric of a dataframe"""
 
+    def __init__(self):
+        """Initializes a DfMetric and set the data_description attribute to None."""
+        self.data_description: Optional[DataDescription] = None
+
     def initialize(self, data_description: DataDescription):
         """Initializes the metric with a data_description"""
         self.data_description = data_description
@@ -41,20 +46,34 @@ def __init__(
         super().__init__()
         self.parq_file_prefix = parq_file_prefix
         self.df_metrics: List[DfMetric] = metrics
+        self.experiment_context = None
 
-    def initialize(self, data_description: DataDescription):
+    def initialize(
+        self,
+        *args,
+        data_description: DataDescription,
+        exp_context: Optional[ExperimentContext] = None,
+        **kwargs,
+    ):
         """
         The initialize function initialized the metrics in `self.metrics`
         This function is called once before the first call to the
         evaluate function. It can be used to initialize any variables that are needed
-        for evaluation. The data_description parameter contains information about the
-        data set, such as number of classes and feature names.
+        for evaluation.
 
         Args:
-            data_description: `DataDescription` used to initialize instances of
-                                this class and the metrics
+            *args: Additional arguments that can be added to the ResultAnalyzer
+            data_description:   DataDescription that is used by the ResultAnalyzer (available in the
+                                `__call__` method). The data_description parameter contains
+                                information about the data set, such as number of classes and
+                                feature names.
+            exp_context:    For some ResultAnalyzers
+                            it may be necessary to obtain the experiment context before calling
+                            the ResultAnalyzer (`__call__`).
+            **kwargs: Additional keyword arguments that can be added to the ResultAnalyzer
         """
-        super().initialize(data_description)
+        super().initialize(data_description=data_description)
+        self.experiment_context = exp_context
         for cur_metric in self.df_metrics:
             cur_metric.initialize(data_description)