Merge branch 'main' of https://github.com/aleph-alpha-intelligence-la…

…yer/intelligence-layer into improve_install_instructions
Aleph-Alpha · Oct 31, 2023 · 84f1a3a · 84f1a3a
2 parents 7791c13 + 97f6b26
commit 84f1a3a
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Aleph Alpha Intelligence Layer ☯️
 
-The  Aleph Alpha Intelligence Layer ☯️ offers a comprehensive suite of development tools for crafting solutions that harness the capabilities of large language models (LLMs).
+The Aleph Alpha Intelligence Layer ☯️ offers a comprehensive suite of development tools for crafting solutions that harness the capabilities of large language models (LLMs).
 With a unified framework for LLM-based workflows, it facilitates seamless AI product development, from prototyping and prompt experimentation to result evaluation and deployment.
 
 The key features of the Intelligence Layer are:
@@ -136,6 +136,10 @@ From here, you can customize everything, including the prompt, model, and more i
 This not only saves you time but also ensures you're building on a tried and tested foundation.
 Therefore, think of these use-cases as stepping stones, guiding you towards crafting tailored solutions that best fit your unique requirements.
 
+## References
+
+- Full documentation: https://glowing-tribble-223446r.pages.github.io/
+
 ## License
 
 This project can only be used after signing the agreement with Aleph Alpha®. Please refer to the [LICENSE](LICENSE.md) file for more details.
diff --git a/src/intelligence_layer/core/evaluator.py b/src/intelligence_layer/core/evaluator.py
@@ -61,13 +61,35 @@ def evaluate(
         logger: DebugLogger,
         expected_output: ExpectedOutput,
     ) -> Evaluation:
-        """Executes the evaluation for this use-case."""
+        """Executes the evaluation for this use-case.
+
+        Arguments:
+            input: Interface to be passed to the task that shall be evaluated.
+            logger: Debug logger used for tracing of tasks.
+            expected_output: Output that is expected from the task run with the supplied input.
+        Returns:
+            Evaluation: interface of the metrics that come from the evaluated task.
+
+        The implementation of this method is responsible for running a task (usually supplied by the __init__ method)
+        and making any comparisons relevant to the evaluation.
+        Based on the results, it should create an `Evaluation` class with all the metrics and return it.
+        """
         pass
 
     def evaluate_dataset(
         self, dataset: Dataset[Input, ExpectedOutput], logger: DebugLogger
     ) -> AggregatedEvaluation:
-        """Evaluates an entire datasets in a threaded manner and aggregates the results into an `AggregatedEvaluation`."""
+        """Evaluates an entire datasets in a threaded manner and aggregates the results into an `AggregatedEvaluation`.
+
+        Arguments:
+            dataset: Dataset that will be used to evaluate a task.
+            logger: Logger used for tracing.
+        Returns:
+            AggregatedEvaluation: The aggregated results of an evaluation run with a dataset.
+
+        This will call the `run` method for each example in the dataset.
+        Finally, it will call the `aggregate` method and return the aggregated results.
+        """
         with ThreadPoolExecutor(max_workers=10) as executor:
             evaluations = list(
                 tqdm(
@@ -87,5 +109,14 @@ def evaluate_dataset(
 
     @abstractmethod
     def aggregate(self, evaluations: Sequence[Evaluation]) -> AggregatedEvaluation:
-        """`Evaluator`-specific method for aggregating individual `Evaluations` into report-like `Aggregated Evaluation`."""
+        """`Evaluator`-specific method for aggregating individual `Evaluations` into report-like `Aggregated Evaluation`.
+
+        Arguments:
+            evalautions: The results from running `evaluate_dataset` with a task.
+        Returns:
+            AggregatedEvaluation: The aggregated results of an evaluation run with a dataset.
+
+        This method is responsible for taking the results of an evaluation run and aggregating all the results.
+        It should create an `AggregatedEvaluation` class and return it at the end.
+        """
         pass
diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py
@@ -91,7 +91,17 @@ def inner(
 
     @abstractmethod
     def run(self, input: Input, logger: DebugLogger) -> Output:
-        """Executes the process for this use-case."""
+        """Executes the implementation of run for this use case.
+
+        Args:
+            input: Generic input defined by the task implementation
+        Returns:
+            output: Generic output defined by the task implementation
+
+        This takes an input and runs the implementation to generate an output.
+        It takes a `DebugLogger` for tracing of the process.
+        The Input and Output are logged by default.
+        """
         ...
 
     def run_concurrently(