ci(lint): Fix inconsistent docstrings

TASK: IL-296
Aleph-Alpha · Mar 27, 2024 · f379ecd · f379ecd
1 parent 7d2517a
commit f379ecd
Show file tree

Hide file tree

Showing 13 changed files with 43 additions and 17 deletions.
diff --git a/Concepts.md b/Concepts.md
@@ -153,7 +153,7 @@ The Intelligence Layer supports different kinds of evaluation techniques. Most i
   a single output, but it is easier to compare two different outputs and decide which one is better. An example
   use case could be summarization.
 
-To support these techniques the Intelligence Layer differantiates between 3 consecutive steps:
+To support these techniques the Intelligence Layer differentiates between 3 consecutive steps:
 
 1. Run a task by feeding it all inputs of a dataset and collecting all outputs
 2. Evaluate the outputs of one or several
@@ -197,7 +197,7 @@ There are the following Repositories:
   and makes them available to the `Aggregator`.
 - The `AggregationRepository` stores the `AggregationOverview` containing the aggregated metrics on request of the `Aggregator`.
 
-The following diagramms illustrate how the different concepts play together in case of the different types of evaluations.
+The following diagrams illustrate how the different concepts play together in case of the different types of evaluations.
 
 <figure>
 <img src="./assets/AbsoluteEvaluation.drawio.svg">

diff --git a/src/intelligence_layer/connectors/argilla/argilla_client.py b/src/intelligence_layer/connectors/argilla/argilla_client.py
@@ -113,7 +113,7 @@ def ensure_dataset_exists(
             fields: all fields of this dataset.
             questions: all questions for this dataset.
         Returns:
-            dataset_id: the id of the dataset to be retrieved .
+            The id of the dataset to be retrieved .
         """
         ...
 
@@ -182,7 +182,7 @@ def ensure_workspace_exists(self, workspace_name: str) -> str:
         Args:
             workspace_name: the name of the workspace to be retrieved or created.
         Returns:
-            workspace_id: The id of an argilla workspace with the given `workspace_name`.
+            The id of an argilla workspace with the given `workspace_name`.
         """
         try:
             return cast(str, self._create_workspace(workspace_name)["id"])

diff --git a/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py b/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py
@@ -140,7 +140,12 @@ def _add_texts_to_memory(self, documents: Sequence[Document]) -> None:
     def get_filtered_documents_with_scores(
         self, query: str, filter: models.Filter
     ) -> Sequence[SearchResult[int]]:
-        """Specific method for `InMemoryRetriever` to support filtering search results."""
+        """Specific method for `InMemoryRetriever` to support filtering search results.
+
+        Args:
+            query: The text to be searched with.
+            filter: Conditions to filter by.
+        """
         query_embedding = self._embed(query, self._query_representation)
         search_result = self._search_client.search(
             collection_name=self._collection_name,

diff --git a/src/intelligence_layer/core/prompt_template.py b/src/intelligence_layer/core/prompt_template.py
@@ -221,8 +221,10 @@ def __init__(self, template_str: str) -> None:
         self._prompt_item_placeholders: dict[Placeholder, Union[Image, Tokens]] = {}
 
     def placeholder(self, value: Union[Image, Tokens]) -> Placeholder:
-        """Saves a non-text prompt item to the template and returns a placeholder
+        """Saves a non-text prompt item to the template and returns a placeholder.
 
+        Args:
+            value: Tokens to store
         The placeholder is used to embed the prompt item in the template
         """
         id = Placeholder(uuid4())
@@ -279,8 +281,10 @@ def embed_prompt(self, prompt: Prompt) -> str:
     def to_rich_prompt(self, **kwargs: Any) -> RichPrompt:
         """Creates a `Prompt` along with metadata from the template string and the given parameters.
 
-        Currently the only metadata returned is information about ranges that are marked in the template.
-        Provided parameters are passed to `liquid.Template.render`.
+        Args:
+             **kwargs: Parameters to enrich prompt with
+         Currently, the only metadata returned is information about ranges that are marked in the template.
+         Provided parameters are passed to `liquid.Template.render`.
         """
         context = PromptRangeContext(
             self._template.env,

diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py
@@ -51,7 +51,7 @@ def do_run(self, input: Input, task_span: TaskSpan) -> Output:
 
         Args:
             input: Generic input defined by the task implementation
-            span: The `Span` used for tracing.
+            task_span: The `Span` used for tracing.
         Returns:
             Generic output defined by the task implementation.
         """

diff --git a/src/intelligence_layer/core/text_highlight.py b/src/intelligence_layer/core/text_highlight.py
@@ -148,7 +148,11 @@ def _raise_on_incompatible_prompt(self, input: TextHighlightInput) -> None:
         """Currently, the text highlight logic does not correctly deal with
         multi item texts. This is a result of returning indices instead of text.
         Therefore, we disable running text highlighting on prompts with more than one index
-        for the moment. This also means we only deal with text items."""
+        for the moment. This also means we only deal with text items.
+
+        Args:
+            input: The input for a text highlighting task.
+        """
         n_items = len(input.rich_prompt.items)
         # the last item is always the question
         if n_items > 2:

diff --git a/src/intelligence_layer/evaluation/aggregation/accumulator.py b/src/intelligence_layer/evaluation/aggregation/accumulator.py
@@ -15,16 +15,19 @@ class Accumulator(ABC, Generic[T, Output]):
     def add(self, value: T) -> None:
         """Responsible for accumulating values
 
-        :param value: the value to add
-        :return: nothing
+        Args:
+            value: the value to add
+        Returns:
+             nothing
         """
         ...
 
     @abstractmethod
     def extract(self) -> Output:
         """Accumulates the final result
 
-        :return: the result of this calculation
+        Returns:
+           float: 0.0 if no values were added before, else the mean
         """
         ...
 

diff --git a/src/intelligence_layer/evaluation/aggregation/aggregator.py b/src/intelligence_layer/evaluation/aggregation/aggregator.py
@@ -189,7 +189,7 @@ def aggregate_evaluation(
         Aggregates :class:`Evaluation`s according to the implementation of :func:`BaseEvaluator.aggregate`.
 
         Args:
-            evaluation_overview: An overview of the evaluation to be aggregated. Does not include
+            eval_ids: An overview of the evaluation to be aggregated. Does not include
                 actual evaluations as these will be retrieved from the repository.
 
         Returns:

diff --git a/src/intelligence_layer/evaluation/evaluation/evaluator.py b/src/intelligence_layer/evaluation/evaluation/evaluator.py
@@ -237,7 +237,7 @@ def evaluate_runs(
                 Always the first n runs stored in the evaluation repository
 
         Returns:
-            An overview of the evaluation. Individual :class:`Evaluation`s will not be
+            EvaluationOverview: An overview of the evaluation. Individual :class:`Evaluation`s will not be
             returned but instead stored in the :class:`EvaluationRepository` provided in the
             __init__.
         """

diff --git a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py
@@ -173,6 +173,9 @@ def find_child(self, token: Token) -> Optional["TreeNode"]:
     def insert_without_calculation(self, path: Sequence[TokenWithProb]) -> None:
         """Inserts a path into the tree without changing the original probability
 
+        Args:
+            path: Path to insert
+
         Temporarily here until we change this data structure to be more versatile"""
         if not path:
             return

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -37,7 +37,11 @@ def token() -> str:
 
 @fixture(scope="session")
 def client(token: str) -> AlephAlphaClientProtocol:
-    """Provide fixture for api."""
+    """Provide fixture for api.
+
+    Args:
+        token: AA Token
+    """
     return LimitedConcurrencyClient(Client(token), max_concurrency=10)
 
 

diff --git a/tests/core/test_echo.py b/tests/core/test_echo.py
@@ -121,6 +121,9 @@ def test_overlapping_tokens_generate_correct_tokens(echo_task: Echo) -> None:
     """This test checks if the echo task correctly tokenizes the expected completion separately
     The two tokens when tokenized together will result in a combination of the end of the first token
     and the start of the second token. This is not the expected behaviour.
+
+    Args:
+        echo_task: Fixture used for this test
     """
     token1 = "ĠGastronomie"
     token2 = "Baby"

diff --git a/tests/evaluation/test_single_huggingface_dataset_repository.py b/tests/evaluation/test_single_huggingface_dataset_repository.py
@@ -1,6 +1,6 @@
-from datasets import Dataset, DatasetDict  # type: ignore
 from pytest import fixture
 
+from datasets import Dataset, DatasetDict  # type: ignore
 from intelligence_layer.evaluation import (
     MultipleChoiceInput,
     SingleHuggingfaceDatasetRepository,