From f479d1fb3b3277e4f8602b45065101247f53ca51 Mon Sep 17 00:00:00 2001
From: Merlin Kallenborn <merlin.kallenborn@tngtech.com>
Date: Wed, 27 Mar 2024 11:52:49 +0100
Subject: [PATCH 1/4] ci(lint): Add step docstring check for recency TASK:
 IL-296

---
 .darglint2                                             |  3 +++
 pyproject.toml                                         |  2 ++
 scripts/lint.sh                                        |  1 +
 .../retrievers/qdrant_in_memory_retriever.py           |  7 ++++++-
 src/intelligence_layer/core/prompt_template.py         |  8 ++++++--
 src/intelligence_layer/core/task.py                    |  2 +-
 .../evaluation/aggregation/accumulator.py              | 10 +++++++---
 .../evaluation/aggregation/aggregator.py               |  6 +++---
 .../evaluation/evaluation/evaluator.py                 |  9 ++++++++-
 .../use_cases/classify/prompt_based_classify.py        |  3 +++
 10 files changed, 40 insertions(+), 11 deletions(-)
 create mode 100644 .darglint2

diff --git a/.darglint2 b/.darglint2
new file mode 100644
index 000000000..634474ff3
--- /dev/null
+++ b/.darglint2
@@ -0,0 +1,3 @@
+[darglint2]
+ignore=DAR003,DAR201,DAR301,DAR401
+docstring_style=google
diff --git a/pyproject.toml b/pyproject.toml
index a71b37da0..3a7dd99a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ sphinx = "^7.2.6"
 pylama = { extras = ["all", "toml"], version = "^8.4.1" }
 faker = "^24.4.0"
 hypercorn = "0.16.0"
+darglint2 = "^1.8.2"
 
 [tool.mypy]
 files = "src,tests"
@@ -71,5 +72,6 @@ filterwarnings = [
 skip = "*/__init__.py,.venv/*,*/node_modules/*"
 ignore = "E501,E203"
 
+
 [tool.pylama.linter.mccabe]
 max-complexity = "11"
diff --git a/scripts/lint.sh b/scripts/lint.sh
index 0d1ae8a3e..004e8b7b2 100755
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -3,3 +3,4 @@
 poetry run pre-commit run --all-files
 poetry run mypy
 poetry run pylama
+poetry run darglint2 -v2 src
diff --git a/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py b/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py
index 8f8a37e87..688c4eaf5 100644
--- a/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py
+++ b/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py
@@ -140,7 +140,12 @@ def _add_texts_to_memory(self, documents: Sequence[Document]) -> None:
     def get_filtered_documents_with_scores(
         self, query: str, filter: models.Filter
     ) -> Sequence[SearchResult[int]]:
-        """Specific method for `InMemoryRetriever` to support filtering search results."""
+        """Specific method for `InMemoryRetriever` to support filtering search results.
+
+        Args:
+            query: The text to be searched with.
+            filter: Conditions to filter by.
+        """
         query_embedding = self._embed(query, self._query_representation)
         search_result = self._search_client.search(
             collection_name=self._collection_name,
diff --git a/src/intelligence_layer/core/prompt_template.py b/src/intelligence_layer/core/prompt_template.py
index 790734a68..42cbbc488 100644
--- a/src/intelligence_layer/core/prompt_template.py
+++ b/src/intelligence_layer/core/prompt_template.py
@@ -221,8 +221,10 @@ def __init__(self, template_str: str) -> None:
         self._prompt_item_placeholders: dict[Placeholder, Union[Image, Tokens]] = {}
 
     def placeholder(self, value: Union[Image, Tokens]) -> Placeholder:
-        """Saves a non-text prompt item to the template and returns a placeholder
+        """Saves a non-text prompt item to the template and returns a placeholder.
 
+        Args:
+            value: Tokens to store
         The placeholder is used to embed the prompt item in the template
         """
         id = Placeholder(uuid4())
@@ -279,7 +281,9 @@ def embed_prompt(self, prompt: Prompt) -> str:
     def to_rich_prompt(self, **kwargs: Any) -> RichPrompt:
         """Creates a `Prompt` along with metadata from the template string and the given parameters.
 
-        Currently the only metadata returned is information about ranges that are marked in the template.
+        Args:
+            **kwargs: Parameters to enrich prompt with
+        Currently, the only metadata returned is information about ranges that are marked in the template.
         Provided parameters are passed to `liquid.Template.render`.
         """
         context = PromptRangeContext(
diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py
index 301483b11..885079187 100644
--- a/src/intelligence_layer/core/task.py
+++ b/src/intelligence_layer/core/task.py
@@ -51,7 +51,7 @@ def do_run(self, input: Input, task_span: TaskSpan) -> Output:
 
         Args:
             input: Generic input defined by the task implementation
-            span: The `Span` used for tracing.
+            task_span: The `Span` used for tracing.
         Returns:
             Generic output defined by the task implementation.
         """
diff --git a/src/intelligence_layer/evaluation/aggregation/accumulator.py b/src/intelligence_layer/evaluation/aggregation/accumulator.py
index 0d80b573e..c250fa09a 100644
--- a/src/intelligence_layer/evaluation/aggregation/accumulator.py
+++ b/src/intelligence_layer/evaluation/aggregation/accumulator.py
@@ -15,8 +15,10 @@ class Accumulator(ABC, Generic[T, Output]):
     def add(self, value: T) -> None:
         """Responsible for accumulating values
 
-        :param value: the value to add
-        :return: nothing
+        Args:
+            value: the value to add
+        Returns:
+             nothing
         """
         ...
 
@@ -43,7 +45,9 @@ def add(self, value: float) -> None:
     def extract(self) -> float:
         """Accumulates the mean
 
-        :return: 0.0 if no values were added before, else the mean"""
+        Returns:
+            float: 0.0 if no values were added before, else the mean
+        """
         return 0.0 if self._n == 0 else self._acc / self._n
 
     def standard_deviation(self) -> float:
diff --git a/src/intelligence_layer/evaluation/aggregation/aggregator.py b/src/intelligence_layer/evaluation/aggregation/aggregator.py
index a5a4d937b..375b6819c 100644
--- a/src/intelligence_layer/evaluation/aggregation/aggregator.py
+++ b/src/intelligence_layer/evaluation/aggregation/aggregator.py
@@ -182,14 +182,14 @@ def evaluation_type(self) -> type[Evaluation]:
 
     @final
     def aggregate_evaluation(
-        self, *eval_ids: str
+        self, *evaluation_ids: str
     ) -> AggregationOverview[AggregatedEvaluation]:
         """Aggregates all evaluations into an overview that includes high-level statistics.
 
         Aggregates :class:`Evaluation`s according to the implementation of :func:`BaseEvaluator.aggregate`.
 
         Args:
-            evaluation_overview: An overview of the evaluation to be aggregated. Does not include
+            evaluation_ids: Unique identifier of the evaluation overviews to be aggregated. Does not include
                 actual evaluations as these will be retrieved from the repository.
 
         Returns:
@@ -207,7 +207,7 @@ def load_eval_overview(evaluation_id: str) -> EvaluationOverview:
             return evaluation_overview
 
         evaluation_overviews = frozenset(
-            load_eval_overview(evaluation_id) for evaluation_id in set(eval_ids)
+            load_eval_overview(evaluation_id) for evaluation_id in set(evaluation_ids)
         )
 
         nested_evaluations = [
diff --git a/src/intelligence_layer/evaluation/evaluation/evaluator.py b/src/intelligence_layer/evaluation/evaluation/evaluator.py
index af751958d..16190f49a 100644
--- a/src/intelligence_layer/evaluation/evaluation/evaluator.py
+++ b/src/intelligence_layer/evaluation/evaluation/evaluator.py
@@ -183,6 +183,10 @@ def output_type(self) -> type[Output]:
 
         Returns:
             the type of the evaluated task's output.
+
+        Raises:
+            TypeError: Raised in case of a KeyError in the Output type.
+
         """
         try:
             output_type = self._get_types()["Output"]
@@ -207,6 +211,9 @@ def evaluation_type(self) -> type[Evaluation]:
 
         Returns:
             Returns the type of the evaluation result of an example.
+
+        Raises:
+            TypeError: Raised in case of a KeyError in the Evaluation type.
         """
         try:
             evaluation_type = self._get_types()["Evaluation"]
@@ -237,7 +244,7 @@ def evaluate_runs(
                 Always the first n runs stored in the evaluation repository
 
         Returns:
-            An overview of the evaluation. Individual :class:`Evaluation`s will not be
+            EvaluationOverview: An overview of the evaluation. Individual :class:`Evaluation`s will not be
             returned but instead stored in the :class:`EvaluationRepository` provided in the
             __init__.
         """
diff --git a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py
index 8d6438bee..612d0b7b5 100644
--- a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py
+++ b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py
@@ -173,6 +173,9 @@ def find_child(self, token: Token) -> Optional["TreeNode"]:
     def insert_without_calculation(self, path: Sequence[TokenWithProb]) -> None:
         """Inserts a path into the tree without changing the original probability
 
+        Args:
+            path: Path to insert
+
         Temporarily here until we change this data structure to be more versatile"""
         if not path:
             return

From 8a46670a61c19096d3b8ba8e1a04cc45bcbe43ee Mon Sep 17 00:00:00 2001
From: Merlin Kallenborn <merlin.kallenborn@tngtech.com>
Date: Tue, 12 Mar 2024 16:37:37 +0100
Subject: [PATCH 2/4] ci(lint): Add step to check if docstrings match current
 implementation in lint.sh + update docstrings TASK: IL-296

---
 poetry.lock                                       | 15 +++++++++++++--
 .../connectors/argilla/argilla_client.py          |  4 ++--
 src/intelligence_layer/core/text_highlight.py     |  6 +++++-
 .../single_huggingface_dataset_repository.py      |  4 ++--
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 396adf2c6..2a1301427 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiodns"
@@ -703,6 +703,17 @@ traitlets = ">=4"
 [package.extras]
 test = ["pytest"]
 
+[[package]]
+name = "darglint2"
+version = "1.8.2"
+description = "A utility for ensuring Google-style docstrings stay up to date with the source code."
+optional = false
+python-versions = ">=3.6,<4.0"
+files = [
+    {file = "darglint2-1.8.2-py3-none-any.whl", hash = "sha256:8f950c9b5fab25dd54bf537bef1569c267073e5828cb5ab76428876df6d947af"},
+    {file = "darglint2-1.8.2.tar.gz", hash = "sha256:11e0fc9c999bf09e192f42b72d202d177cb82da258eba387b24c2f0f5943650f"},
+]
+
 [[package]]
 name = "datasets"
 version = "2.18.0"
@@ -5292,4 +5303,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "b551964342853508feac9d45069d795f743dbbd641c7b819f4e6cc251e67622e"
+content-hash = "13bdc79f40242b5f5fb458f526c75ce869bf7edd2991b0e1720a036c5b1ab386"
diff --git a/src/intelligence_layer/connectors/argilla/argilla_client.py b/src/intelligence_layer/connectors/argilla/argilla_client.py
index 28ac0387c..f45c3a61e 100644
--- a/src/intelligence_layer/connectors/argilla/argilla_client.py
+++ b/src/intelligence_layer/connectors/argilla/argilla_client.py
@@ -113,7 +113,7 @@ def ensure_dataset_exists(
             fields: all fields of this dataset.
             questions: all questions for this dataset.
         Returns:
-            dataset_id: the id of the dataset to be retrieved .
+            The id of the dataset to be retrieved .
         """
         ...
 
@@ -182,7 +182,7 @@ def ensure_workspace_exists(self, workspace_name: str) -> str:
         Args:
             workspace_name: the name of the workspace to be retrieved or created.
         Returns:
-            workspace_id: The id of an argilla workspace with the given `workspace_name`.
+            The id of an argilla workspace with the given `workspace_name`.
         """
         try:
             return cast(str, self._create_workspace(workspace_name)["id"])
diff --git a/src/intelligence_layer/core/text_highlight.py b/src/intelligence_layer/core/text_highlight.py
index 6b47bb917..ccdedaebd 100644
--- a/src/intelligence_layer/core/text_highlight.py
+++ b/src/intelligence_layer/core/text_highlight.py
@@ -148,7 +148,11 @@ def _raise_on_incompatible_prompt(self, input: TextHighlightInput) -> None:
         """Currently, the text highlight logic does not correctly deal with
         multi item texts. This is a result of returning indices instead of text.
         Therefore, we disable running text highlighting on prompts with more than one index
-        for the moment. This also means we only deal with text items."""
+        for the moment. This also means we only deal with text items.
+
+        Args:
+            input: The input for a text highlighting task.
+        """
         n_items = len(input.rich_prompt.items)
         # the last item is always the question
         if n_items > 2:
diff --git a/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py b/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py
index 184878f9a..e4e88fccc 100644
--- a/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py
+++ b/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py
@@ -1,9 +1,9 @@
 from typing import Iterable, Sequence, cast
 
-from datasets import Dataset as HFDataset  # type: ignore
-from datasets import DatasetDict, IterableDataset, IterableDatasetDict
 from pydantic import BaseModel
 
+from datasets import Dataset as HFDataset  # type: ignore
+from datasets import DatasetDict, IterableDataset, IterableDatasetDict
 from intelligence_layer.core.task import Input
 from intelligence_layer.evaluation.dataset.dataset_repository import DatasetRepository
 from intelligence_layer.evaluation.dataset.domain import (

From f53e5c3aba6de53d4466b7fdf32543732d86d083 Mon Sep 17 00:00:00 2001
From: Merlin Kallenborn <merlin.kallenborn@tngtech.com>
Date: Tue, 12 Mar 2024 16:37:37 +0100
Subject: [PATCH 3/4] ci(lint): Add step to check if docstrings match current
 implementation in lint.sh + update docstrings TASK: IL-296

---
 .darglint2                          |  2 +-
 .pre-commit-config.yaml             | 16 ++++++++++------
 Concepts.md                         |  4 ++--
 pyproject.toml                      |  2 ++
 scripts/lint.sh                     |  1 -
 src/examples/performance_tips.ipynb | 26 +++++++++++++-------------
 src/examples/qa.ipynb               |  1 -
 tests/conftest.py                   |  6 +++++-
 tests/core/test_echo.py             |  3 +++
 9 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/.darglint2 b/.darglint2
index 634474ff3..4bbb6a31a 100644
--- a/.darglint2
+++ b/.darglint2
@@ -1,3 +1,3 @@
 [darglint2]
-ignore=DAR003,DAR201,DAR301,DAR401
+ignore=DAR003,DAR201,DAR202,DAR301,DAR401
 docstring_style=google
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 728d32cb2..c960e9203 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.5.0
     hooks:
       - id: check-json
         exclude: trace-viewer/
@@ -21,22 +21,26 @@ repos:
         args: ["--profile", "black", "--filter-files"]
         verbose: true
   - repo: https://github.com/psf/black
-    rev: 24.2.0
+    rev: 24.3.0
     hooks:
       - id: black
   # https://black.readthedocs.io/en/stable/integrations/source_version_control.html#version-control-integration
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.2.0
+    rev: 24.3.0
     hooks:
       - id: black-jupyter
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.4.0
+    rev: 0.7.1
     hooks:
       - id: nbstripout
         files: ".ipynb"
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.2.6
     hooks:
       - id: codespell
-        args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke"]
+        args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor"]
         exclude: '^(poetry\.lock|trace-viewer/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py|src/intelligence_layer/use_cases/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/classify/keyword_extract.py|tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py|tests/use_cases/summarize/very_long_text.txt)$'
+  - repo: https://github.com/akaihola/darglint2
+    rev: v1.8.2
+    hooks:
+      - id: darglint2
diff --git a/Concepts.md b/Concepts.md
index 9464d05c5..f42bfe2c7 100644
--- a/Concepts.md
+++ b/Concepts.md
@@ -153,7 +153,7 @@ The Intelligence Layer supports different kinds of evaluation techniques. Most i
   a single output, but it is easier to compare two different outputs and decide which one is better. An example
   use case could be summarization.
 
-To support these techniques the Intelligence Layer differantiates between 3 consecutive steps:
+To support these techniques the Intelligence Layer differentiates between 3 consecutive steps:
 
 1. Run a task by feeding it all inputs of a dataset and collecting all outputs
 2. Evaluate the outputs of one or several
@@ -197,7 +197,7 @@ There are the following Repositories:
   and makes them available to the `Aggregator`.
 - The `AggregationRepository` stores the `AggregationOverview` containing the aggregated metrics on request of the `Aggregator`.
 
-The following diagramms illustrate how the different concepts play together in case of the different types of evaluations.
+The following diagrams illustrate how the different concepts play together in case of the different types of evaluations.
 
 <figure>
 <img src="./assets/AbsoluteEvaluation.drawio.svg">
diff --git a/pyproject.toml b/pyproject.toml
index 3a7dd99a9..948ba3bdd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,6 +72,8 @@ filterwarnings = [
 skip = "*/__init__.py,.venv/*,*/node_modules/*"
 ignore = "E501,E203"
 
+[tool.darglint2]
+
 
 [tool.pylama.linter.mccabe]
 max-complexity = "11"
diff --git a/scripts/lint.sh b/scripts/lint.sh
index 004e8b7b2..0d1ae8a3e 100755
--- a/scripts/lint.sh
+++ b/scripts/lint.sh
@@ -3,4 +3,3 @@
 poetry run pre-commit run --all-files
 poetry run mypy
 poetry run pylama
-poetry run darglint2 -v2 src
diff --git a/src/examples/performance_tips.ipynb b/src/examples/performance_tips.ipynb
index aac04fed4..8a530f790 100644
--- a/src/examples/performance_tips.ipynb
+++ b/src/examples/performance_tips.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "d8767b2a",
+   "id": "0",
    "metadata": {},
    "source": [
     "# How to get more done in less time\n",
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e04cb25b",
+   "id": "1",
    "metadata": {},
    "source": [
     "## A single long running task\n",
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e7fbae35",
+   "id": "2",
    "metadata": {},
    "source": [
     "## Running one task multiple times\n",
@@ -40,7 +40,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "04dac517",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +71,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "f58f359a",
+   "id": "4",
    "metadata": {},
    "source": [
     "## Running several tasks at the same time\n",
@@ -82,7 +82,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8959fcec-dc54-4137-9cb8-3a9c70d6a3d0",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -104,7 +104,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4e846c9c",
+   "id": "6",
    "metadata": {},
    "source": [
     "<a id='submit_example'></a>\n",
@@ -115,7 +115,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6c88c3a2",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -131,7 +131,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "345244a1",
+   "id": "8",
    "metadata": {},
    "source": [
     "`ThreadPool` can easily be used via the function `.map`. This processes a list of jobs in order and outputs the results once all jobs are done.  \n",
@@ -142,7 +142,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6b71469e",
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -158,7 +158,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a786e543",
+   "id": "10",
    "metadata": {},
    "source": [
     "`ThreadPool.map` can also be used with `Task.run_concurrently()` in which case the creation of the jobs becomes slightly easier."
@@ -167,7 +167,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "de3fe114",
+   "id": "11",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -184,7 +184,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4e775da7",
+   "id": "12",
    "metadata": {},
    "source": [
     "<div class=\"alert alert-warning\">\n",
diff --git a/src/examples/qa.ipynb b/src/examples/qa.ipynb
index eecf3d6aa..17c8cd20c 100644
--- a/src/examples/qa.ipynb
+++ b/src/examples/qa.ipynb
@@ -69,7 +69,6 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/tests/conftest.py b/tests/conftest.py
index 9ea8c67cc..071956f43 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -37,7 +37,11 @@ def token() -> str:
 
 @fixture(scope="session")
 def client(token: str) -> AlephAlphaClientProtocol:
-    """Provide fixture for api."""
+    """Provide fixture for api.
+
+    Args:
+        token: AA Token
+    """
     return LimitedConcurrencyClient(Client(token), max_concurrency=10)
 
 
diff --git a/tests/core/test_echo.py b/tests/core/test_echo.py
index 114603544..74ec008cb 100644
--- a/tests/core/test_echo.py
+++ b/tests/core/test_echo.py
@@ -121,6 +121,9 @@ def test_overlapping_tokens_generate_correct_tokens(echo_task: Echo) -> None:
     """This test checks if the echo task correctly tokenizes the expected completion separately
     The two tokens when tokenized together will result in a combination of the end of the first token
     and the start of the second token. This is not the expected behaviour.
+
+    Args:
+        echo_task: Fixture used for this test
     """
     token1 = "ĠGastronomie"
     token2 = "Baby"

From d9f13907a2583e62af40648056c68e120642fe42 Mon Sep 17 00:00:00 2001
From: Merlin Kallenborn <merlin.kallenborn@tngtech.com>
Date: Tue, 12 Mar 2024 16:37:37 +0100
Subject: [PATCH 4/4] ci(lint): Add step to check if docstrings match current
 implementation in lint.sh + update docstrings TASK: IL-296

---
 tests/evaluation/test_single_huggingface_dataset_repository.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/evaluation/test_single_huggingface_dataset_repository.py b/tests/evaluation/test_single_huggingface_dataset_repository.py
index 34170debc..6d658b010 100644
--- a/tests/evaluation/test_single_huggingface_dataset_repository.py
+++ b/tests/evaluation/test_single_huggingface_dataset_repository.py
@@ -1,6 +1,6 @@
-from datasets import Dataset, DatasetDict  # type: ignore
 from pytest import fixture
 
+from datasets import Dataset, DatasetDict  # type: ignore
 from intelligence_layer.evaluation import (
     MultipleChoiceInput,
     SingleHuggingfaceDatasetRepository,