Aleph-Alpha · MerlinKallenbornTNG · Mar 27, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024
diff --git a/.darglint2 b/.darglint2
@@ -0,0 +1,3 @@
+[darglint2]
+ignore=DAR003,DAR201,DAR202,DAR301,DAR401
+docstring_style=google
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.5.0
     hooks:
       - id: check-json
         exclude: trace-viewer/
@@ -21,22 +21,26 @@ repos:
         args: ["--profile", "black", "--filter-files"]
         verbose: true
   - repo: https://github.com/psf/black
-    rev: 24.2.0
+    rev: 24.3.0
     hooks:
       - id: black
   # https://black.readthedocs.io/en/stable/integrations/source_version_control.html#version-control-integration
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.2.0
+    rev: 24.3.0
     hooks:
       - id: black-jupyter
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.4.0
+    rev: 0.7.1
     hooks:
       - id: nbstripout
         files: ".ipynb"
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.2.6
     hooks:
       - id: codespell
-        args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke"]
+        args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor"]
         exclude: '^(poetry\.lock|trace-viewer/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py|src/intelligence_layer/use_cases/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/classify/keyword_extract.py|tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py|tests/use_cases/summarize/very_long_text.txt)$'
+  - repo: https://github.com/akaihola/darglint2
+    rev: v1.8.2
+    hooks:
+      - id: darglint2
diff --git a/Concepts.md b/Concepts.md
@@ -153,7 +153,7 @@ The Intelligence Layer supports different kinds of evaluation techniques. Most i
   a single output, but it is easier to compare two different outputs and decide which one is better. An example
   use case could be summarization.
 
-To support these techniques the Intelligence Layer differantiates between 3 consecutive steps:
+To support these techniques the Intelligence Layer differentiates between 3 consecutive steps:
 
 1. Run a task by feeding it all inputs of a dataset and collecting all outputs
 2. Evaluate the outputs of one or several
@@ -197,7 +197,7 @@ There are the following Repositories:
   and makes them available to the `Aggregator`.
 - The `AggregationRepository` stores the `AggregationOverview` containing the aggregated metrics on request of the `Aggregator`.
 
-The following diagramms illustrate how the different concepts play together in case of the different types of evaluations.
+The following diagrams illustrate how the different concepts play together in case of the different types of evaluations.
 
 <figure>
 <img src="./assets/AbsoluteEvaluation.drawio.svg">

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,6 +53,7 @@ sphinx = "^7.2.6"
 pylama = { extras = ["all", "toml"], version = "^8.4.1" }
 faker = "^24.4.0"
 hypercorn = "0.16.0"
+darglint2 = "^1.8.2"
 
 [tool.mypy]
 files = "src,tests"
@@ -71,5 +72,8 @@ filterwarnings = [
 skip = "*/__init__.py,.venv/*,*/node_modules/*"
 ignore = "E501,E203"
 
+[tool.darglint2]
+
+
 [tool.pylama.linter.mccabe]
 max-complexity = "11"
diff --git a/src/examples/performance_tips.ipynb b/src/examples/performance_tips.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "d8767b2a",
+   "id": "0",
    "metadata": {},
    "source": [
     "# How to get more done in less time\n",
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e04cb25b",
+   "id": "1",
    "metadata": {},
    "source": [
     "## A single long running task\n",
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e7fbae35",
+   "id": "2",
    "metadata": {},
    "source": [
     "## Running one task multiple times\n",
@@ -40,7 +40,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "04dac517",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +71,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f58f359a",
+   "id": "4",
    "metadata": {},
    "source": [
     "## Running several tasks at the same time\n",
@@ -82,7 +82,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8959fcec-dc54-4137-9cb8-3a9c70d6a3d0",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -104,7 +104,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4e846c9c",
+   "id": "6",
    "metadata": {},
    "source": [
     "<a id='submit_example'></a>\n",
@@ -115,7 +115,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6c88c3a2",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -131,7 +131,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "345244a1",
+   "id": "8",
    "metadata": {},
    "source": [
     "`ThreadPool` can easily be used via the function `.map`. This processes a list of jobs in order and outputs the results once all jobs are done.  \n",
@@ -142,7 +142,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6b71469e",
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -158,7 +158,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a786e543",
+   "id": "10",
    "metadata": {},
    "source": [
     "`ThreadPool.map` can also be used with `Task.run_concurrently()` in which case the creation of the jobs becomes slightly easier."
@@ -167,7 +167,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "de3fe114",
+   "id": "11",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -184,7 +184,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4e775da7",
+   "id": "12",
    "metadata": {},
    "source": [
     "<div class=\"alert alert-warning\">\n",

diff --git a/src/examples/qa.ipynb b/src/examples/qa.ipynb
@@ -69,7 +69,6 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/src/intelligence_layer/connectors/argilla/argilla_client.py b/src/intelligence_layer/connectors/argilla/argilla_client.py
@@ -113,7 +113,7 @@ def ensure_dataset_exists(
             fields: all fields of this dataset.
             questions: all questions for this dataset.
         Returns:
-            dataset_id: the id of the dataset to be retrieved .
+            The id of the dataset to be retrieved .
         """
         ...
 
@@ -182,7 +182,7 @@ def ensure_workspace_exists(self, workspace_name: str) -> str:
         Args:
             workspace_name: the name of the workspace to be retrieved or created.
         Returns:
-            workspace_id: The id of an argilla workspace with the given `workspace_name`.
+            The id of an argilla workspace with the given `workspace_name`.
         """
         try:
             return cast(str, self._create_workspace(workspace_name)["id"])

diff --git a/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py b/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py
@@ -140,7 +140,12 @@ def _add_texts_to_memory(self, documents: Sequence[Document]) -> None:
     def get_filtered_documents_with_scores(
         self, query: str, filter: models.Filter
     ) -> Sequence[SearchResult[int]]:
-        """Specific method for `InMemoryRetriever` to support filtering search results."""
+        """Specific method for `InMemoryRetriever` to support filtering search results.
+
+        Args:
+            query: The text to be searched with.
+            filter: Conditions to filter by.
+        """
         query_embedding = self._embed(query, self._query_representation)
         search_result = self._search_client.search(
             collection_name=self._collection_name,

diff --git a/src/intelligence_layer/core/prompt_template.py b/src/intelligence_layer/core/prompt_template.py
@@ -221,8 +221,10 @@ def __init__(self, template_str: str) -> None:
         self._prompt_item_placeholders: dict[Placeholder, Union[Image, Tokens]] = {}
 
     def placeholder(self, value: Union[Image, Tokens]) -> Placeholder:
-        """Saves a non-text prompt item to the template and returns a placeholder
+        """Saves a non-text prompt item to the template and returns a placeholder.
 
+        Args:
+            value: Tokens to store
         The placeholder is used to embed the prompt item in the template
         """
         id = Placeholder(uuid4())
@@ -279,7 +281,9 @@ def embed_prompt(self, prompt: Prompt) -> str:
     def to_rich_prompt(self, **kwargs: Any) -> RichPrompt:
         """Creates a `Prompt` along with metadata from the template string and the given parameters.
 
-        Currently the only metadata returned is information about ranges that are marked in the template.
+        Args:
+            **kwargs: Parameters to enrich prompt with
+        Currently, the only metadata returned is information about ranges that are marked in the template.
         Provided parameters are passed to `liquid.Template.render`.
         """
         context = PromptRangeContext(

diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py
@@ -51,7 +51,7 @@ def do_run(self, input: Input, task_span: TaskSpan) -> Output:
 
         Args:
             input: Generic input defined by the task implementation
-            span: The `Span` used for tracing.
+            task_span: The `Span` used for tracing.
         Returns:
             Generic output defined by the task implementation.
         """

diff --git a/src/intelligence_layer/core/text_highlight.py b/src/intelligence_layer/core/text_highlight.py
@@ -148,7 +148,11 @@ def _raise_on_incompatible_prompt(self, input: TextHighlightInput) -> None:
         """Currently, the text highlight logic does not correctly deal with
         multi item texts. This is a result of returning indices instead of text.
         Therefore, we disable running text highlighting on prompts with more than one index
-        for the moment. This also means we only deal with text items."""
+        for the moment. This also means we only deal with text items.
+
+        Args:
+            input: The input for a text highlighting task.
+        """
         n_items = len(input.rich_prompt.items)
         # the last item is always the question
         if n_items > 2:

diff --git a/src/intelligence_layer/evaluation/aggregation/accumulator.py b/src/intelligence_layer/evaluation/aggregation/accumulator.py
@@ -15,8 +15,10 @@ class Accumulator(ABC, Generic[T, Output]):
     def add(self, value: T) -> None:
         """Responsible for accumulating values
 
-        :param value: the value to add
-        :return: nothing
+        Args:
+            value: the value to add
+        Returns:
+             nothing
         """
         ...
 
@@ -43,7 +45,9 @@ def add(self, value: float) -> None:
     def extract(self) -> float:
         """Accumulates the mean
 
-        :return: 0.0 if no values were added before, else the mean"""
+        Returns:
+            float: 0.0 if no values were added before, else the mean
+        """
         return 0.0 if self._n == 0 else self._acc / self._n
 
     def standard_deviation(self) -> float:

diff --git a/src/intelligence_layer/evaluation/aggregation/aggregator.py b/src/intelligence_layer/evaluation/aggregation/aggregator.py
@@ -182,14 +182,14 @@ def evaluation_type(self) -> type[Evaluation]:
 
     @final
     def aggregate_evaluation(
-        self, *eval_ids: str
+        self, *evaluation_ids: str
     ) -> AggregationOverview[AggregatedEvaluation]:
         """Aggregates all evaluations into an overview that includes high-level statistics.
 
         Aggregates :class:`Evaluation`s according to the implementation of :func:`BaseEvaluator.aggregate`.
 
         Args:
-            evaluation_overview: An overview of the evaluation to be aggregated. Does not include
+            evaluation_ids: Unique identifier of the evaluation overviews to be aggregated. Does not include
                 actual evaluations as these will be retrieved from the repository.
 
         Returns:
@@ -207,7 +207,7 @@ def load_eval_overview(evaluation_id: str) -> EvaluationOverview:
             return evaluation_overview
 
         evaluation_overviews = frozenset(
-            load_eval_overview(evaluation_id) for evaluation_id in set(eval_ids)
+            load_eval_overview(evaluation_id) for evaluation_id in set(evaluation_ids)
         )
 
         nested_evaluations = [

diff --git a/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py b/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py
@@ -1,9 +1,9 @@
 from typing import Iterable, Sequence, cast
 
-from datasets import Dataset as HFDataset  # type: ignore
-from datasets import DatasetDict, IterableDataset, IterableDatasetDict
 from pydantic import BaseModel
 
+from datasets import Dataset as HFDataset  # type: ignore
+from datasets import DatasetDict, IterableDataset, IterableDatasetDict
 from intelligence_layer.core.task import Input
 from intelligence_layer.evaluation.dataset.dataset_repository import DatasetRepository
 from intelligence_layer.evaluation.dataset.domain import (