From f479d1fb3b3277e4f8602b45065101247f53ca51 Mon Sep 17 00:00:00 2001 From: Merlin Kallenborn Date: Wed, 27 Mar 2024 11:52:49 +0100 Subject: [PATCH 1/4] ci(lint): Add step docstring check for recency TASK: IL-296 --- .darglint2 | 3 +++ pyproject.toml | 2 ++ scripts/lint.sh | 1 + .../retrievers/qdrant_in_memory_retriever.py | 7 ++++++- src/intelligence_layer/core/prompt_template.py | 8 ++++++-- src/intelligence_layer/core/task.py | 2 +- .../evaluation/aggregation/accumulator.py | 10 +++++++--- .../evaluation/aggregation/aggregator.py | 6 +++--- .../evaluation/evaluation/evaluator.py | 9 ++++++++- .../use_cases/classify/prompt_based_classify.py | 3 +++ 10 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 .darglint2 diff --git a/.darglint2 b/.darglint2 new file mode 100644 index 000000000..634474ff3 --- /dev/null +++ b/.darglint2 @@ -0,0 +1,3 @@ +[darglint2] +ignore=DAR003,DAR201,DAR301,DAR401 +docstring_style=google diff --git a/pyproject.toml b/pyproject.toml index a71b37da0..3a7dd99a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ sphinx = "^7.2.6" pylama = { extras = ["all", "toml"], version = "^8.4.1" } faker = "^24.4.0" hypercorn = "0.16.0" +darglint2 = "^1.8.2" [tool.mypy] files = "src,tests" @@ -71,5 +72,6 @@ filterwarnings = [ skip = "*/__init__.py,.venv/*,*/node_modules/*" ignore = "E501,E203" + [tool.pylama.linter.mccabe] max-complexity = "11" diff --git a/scripts/lint.sh b/scripts/lint.sh index 0d1ae8a3e..004e8b7b2 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -3,3 +3,4 @@ poetry run pre-commit run --all-files poetry run mypy poetry run pylama +poetry run darglint2 -v2 src diff --git a/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py b/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py index 8f8a37e87..688c4eaf5 100644 --- a/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py +++ b/src/intelligence_layer/connectors/retrievers/qdrant_in_memory_retriever.py @@ -140,7 +140,12 @@ def _add_texts_to_memory(self, documents: Sequence[Document]) -> None: def get_filtered_documents_with_scores( self, query: str, filter: models.Filter ) -> Sequence[SearchResult[int]]: - """Specific method for `InMemoryRetriever` to support filtering search results.""" + """Specific method for `InMemoryRetriever` to support filtering search results. + + Args: + query: The text to be searched with. + filter: Conditions to filter by. + """ query_embedding = self._embed(query, self._query_representation) search_result = self._search_client.search( collection_name=self._collection_name, diff --git a/src/intelligence_layer/core/prompt_template.py b/src/intelligence_layer/core/prompt_template.py index 790734a68..42cbbc488 100644 --- a/src/intelligence_layer/core/prompt_template.py +++ b/src/intelligence_layer/core/prompt_template.py @@ -221,8 +221,10 @@ def __init__(self, template_str: str) -> None: self._prompt_item_placeholders: dict[Placeholder, Union[Image, Tokens]] = {} def placeholder(self, value: Union[Image, Tokens]) -> Placeholder: - """Saves a non-text prompt item to the template and returns a placeholder + """Saves a non-text prompt item to the template and returns a placeholder. + Args: + value: Tokens to store The placeholder is used to embed the prompt item in the template """ id = Placeholder(uuid4()) @@ -279,7 +281,9 @@ def embed_prompt(self, prompt: Prompt) -> str: def to_rich_prompt(self, **kwargs: Any) -> RichPrompt: """Creates a `Prompt` along with metadata from the template string and the given parameters. - Currently the only metadata returned is information about ranges that are marked in the template. + Args: + **kwargs: Parameters to enrich prompt with + Currently, the only metadata returned is information about ranges that are marked in the template. Provided parameters are passed to `liquid.Template.render`. """ context = PromptRangeContext( diff --git a/src/intelligence_layer/core/task.py b/src/intelligence_layer/core/task.py index 301483b11..885079187 100644 --- a/src/intelligence_layer/core/task.py +++ b/src/intelligence_layer/core/task.py @@ -51,7 +51,7 @@ def do_run(self, input: Input, task_span: TaskSpan) -> Output: Args: input: Generic input defined by the task implementation - span: The `Span` used for tracing. + task_span: The `Span` used for tracing. Returns: Generic output defined by the task implementation. """ diff --git a/src/intelligence_layer/evaluation/aggregation/accumulator.py b/src/intelligence_layer/evaluation/aggregation/accumulator.py index 0d80b573e..c250fa09a 100644 --- a/src/intelligence_layer/evaluation/aggregation/accumulator.py +++ b/src/intelligence_layer/evaluation/aggregation/accumulator.py @@ -15,8 +15,10 @@ class Accumulator(ABC, Generic[T, Output]): def add(self, value: T) -> None: """Responsible for accumulating values - :param value: the value to add - :return: nothing + Args: + value: the value to add + Returns: + nothing """ ... @@ -43,7 +45,9 @@ def add(self, value: float) -> None: def extract(self) -> float: """Accumulates the mean - :return: 0.0 if no values were added before, else the mean""" + Returns: + float: 0.0 if no values were added before, else the mean + """ return 0.0 if self._n == 0 else self._acc / self._n def standard_deviation(self) -> float: diff --git a/src/intelligence_layer/evaluation/aggregation/aggregator.py b/src/intelligence_layer/evaluation/aggregation/aggregator.py index a5a4d937b..375b6819c 100644 --- a/src/intelligence_layer/evaluation/aggregation/aggregator.py +++ b/src/intelligence_layer/evaluation/aggregation/aggregator.py @@ -182,14 +182,14 @@ def evaluation_type(self) -> type[Evaluation]: @final def aggregate_evaluation( - self, *eval_ids: str + self, *evaluation_ids: str ) -> AggregationOverview[AggregatedEvaluation]: """Aggregates all evaluations into an overview that includes high-level statistics. Aggregates :class:`Evaluation`s according to the implementation of :func:`BaseEvaluator.aggregate`. Args: - evaluation_overview: An overview of the evaluation to be aggregated. Does not include + evaluation_ids: Unique identifier of the evaluation overviews to be aggregated. Does not include actual evaluations as these will be retrieved from the repository. Returns: @@ -207,7 +207,7 @@ def load_eval_overview(evaluation_id: str) -> EvaluationOverview: return evaluation_overview evaluation_overviews = frozenset( - load_eval_overview(evaluation_id) for evaluation_id in set(eval_ids) + load_eval_overview(evaluation_id) for evaluation_id in set(evaluation_ids) ) nested_evaluations = [ diff --git a/src/intelligence_layer/evaluation/evaluation/evaluator.py b/src/intelligence_layer/evaluation/evaluation/evaluator.py index af751958d..16190f49a 100644 --- a/src/intelligence_layer/evaluation/evaluation/evaluator.py +++ b/src/intelligence_layer/evaluation/evaluation/evaluator.py @@ -183,6 +183,10 @@ def output_type(self) -> type[Output]: Returns: the type of the evaluated task's output. + + Raises: + TypeError: Raised in case of a KeyError in the Output type. + """ try: output_type = self._get_types()["Output"] @@ -207,6 +211,9 @@ def evaluation_type(self) -> type[Evaluation]: Returns: Returns the type of the evaluation result of an example. + + Raises: + TypeError: Raised in case of a KeyError in the Evaluation type. """ try: evaluation_type = self._get_types()["Evaluation"] @@ -237,7 +244,7 @@ def evaluate_runs( Always the first n runs stored in the evaluation repository Returns: - An overview of the evaluation. Individual :class:`Evaluation`s will not be + EvaluationOverview: An overview of the evaluation. Individual :class:`Evaluation`s will not be returned but instead stored in the :class:`EvaluationRepository` provided in the __init__. """ diff --git a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py index 8d6438bee..612d0b7b5 100644 --- a/src/intelligence_layer/use_cases/classify/prompt_based_classify.py +++ b/src/intelligence_layer/use_cases/classify/prompt_based_classify.py @@ -173,6 +173,9 @@ def find_child(self, token: Token) -> Optional["TreeNode"]: def insert_without_calculation(self, path: Sequence[TokenWithProb]) -> None: """Inserts a path into the tree without changing the original probability + Args: + path: Path to insert + Temporarily here until we change this data structure to be more versatile""" if not path: return From 8a46670a61c19096d3b8ba8e1a04cc45bcbe43ee Mon Sep 17 00:00:00 2001 From: Merlin Kallenborn Date: Tue, 12 Mar 2024 16:37:37 +0100 Subject: [PATCH 2/4] ci(lint): Add step to check if docstrings match current implementation in lint.sh + update docstrings TASK: IL-296 --- poetry.lock | 15 +++++++++++++-- .../connectors/argilla/argilla_client.py | 4 ++-- src/intelligence_layer/core/text_highlight.py | 6 +++++- .../single_huggingface_dataset_repository.py | 4 ++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 396adf2c6..2a1301427 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiodns" @@ -703,6 +703,17 @@ traitlets = ">=4" [package.extras] test = ["pytest"] +[[package]] +name = "darglint2" +version = "1.8.2" +description = "A utility for ensuring Google-style docstrings stay up to date with the source code." +optional = false +python-versions = ">=3.6,<4.0" +files = [ + {file = "darglint2-1.8.2-py3-none-any.whl", hash = "sha256:8f950c9b5fab25dd54bf537bef1569c267073e5828cb5ab76428876df6d947af"}, + {file = "darglint2-1.8.2.tar.gz", hash = "sha256:11e0fc9c999bf09e192f42b72d202d177cb82da258eba387b24c2f0f5943650f"}, +] + [[package]] name = "datasets" version = "2.18.0" @@ -5292,4 +5303,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "b551964342853508feac9d45069d795f743dbbd641c7b819f4e6cc251e67622e" +content-hash = "13bdc79f40242b5f5fb458f526c75ce869bf7edd2991b0e1720a036c5b1ab386" diff --git a/src/intelligence_layer/connectors/argilla/argilla_client.py b/src/intelligence_layer/connectors/argilla/argilla_client.py index 28ac0387c..f45c3a61e 100644 --- a/src/intelligence_layer/connectors/argilla/argilla_client.py +++ b/src/intelligence_layer/connectors/argilla/argilla_client.py @@ -113,7 +113,7 @@ def ensure_dataset_exists( fields: all fields of this dataset. questions: all questions for this dataset. Returns: - dataset_id: the id of the dataset to be retrieved . + The id of the dataset to be retrieved . """ ... @@ -182,7 +182,7 @@ def ensure_workspace_exists(self, workspace_name: str) -> str: Args: workspace_name: the name of the workspace to be retrieved or created. Returns: - workspace_id: The id of an argilla workspace with the given `workspace_name`. + The id of an argilla workspace with the given `workspace_name`. """ try: return cast(str, self._create_workspace(workspace_name)["id"]) diff --git a/src/intelligence_layer/core/text_highlight.py b/src/intelligence_layer/core/text_highlight.py index 6b47bb917..ccdedaebd 100644 --- a/src/intelligence_layer/core/text_highlight.py +++ b/src/intelligence_layer/core/text_highlight.py @@ -148,7 +148,11 @@ def _raise_on_incompatible_prompt(self, input: TextHighlightInput) -> None: """Currently, the text highlight logic does not correctly deal with multi item texts. This is a result of returning indices instead of text. Therefore, we disable running text highlighting on prompts with more than one index - for the moment. This also means we only deal with text items.""" + for the moment. This also means we only deal with text items. + + Args: + input: The input for a text highlighting task. + """ n_items = len(input.rich_prompt.items) # the last item is always the question if n_items > 2: diff --git a/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py b/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py index 184878f9a..e4e88fccc 100644 --- a/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py +++ b/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py @@ -1,9 +1,9 @@ from typing import Iterable, Sequence, cast -from datasets import Dataset as HFDataset # type: ignore -from datasets import DatasetDict, IterableDataset, IterableDatasetDict from pydantic import BaseModel +from datasets import Dataset as HFDataset # type: ignore +from datasets import DatasetDict, IterableDataset, IterableDatasetDict from intelligence_layer.core.task import Input from intelligence_layer.evaluation.dataset.dataset_repository import DatasetRepository from intelligence_layer.evaluation.dataset.domain import ( From f53e5c3aba6de53d4466b7fdf32543732d86d083 Mon Sep 17 00:00:00 2001 From: Merlin Kallenborn Date: Tue, 12 Mar 2024 16:37:37 +0100 Subject: [PATCH 3/4] ci(lint): Add step to check if docstrings match current implementation in lint.sh + update docstrings TASK: IL-296 --- .darglint2 | 2 +- .pre-commit-config.yaml | 16 ++++++++++------ Concepts.md | 4 ++-- pyproject.toml | 2 ++ scripts/lint.sh | 1 - src/examples/performance_tips.ipynb | 26 +++++++++++++------------- src/examples/qa.ipynb | 1 - tests/conftest.py | 6 +++++- tests/core/test_echo.py | 3 +++ 9 files changed, 36 insertions(+), 25 deletions(-) diff --git a/.darglint2 b/.darglint2 index 634474ff3..4bbb6a31a 100644 --- a/.darglint2 +++ b/.darglint2 @@ -1,3 +1,3 @@ [darglint2] -ignore=DAR003,DAR201,DAR301,DAR401 +ignore=DAR003,DAR201,DAR202,DAR301,DAR401 docstring_style=google diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 728d32cb2..c960e9203 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.5.0 hooks: - id: check-json exclude: trace-viewer/ @@ -21,22 +21,26 @@ repos: args: ["--profile", "black", "--filter-files"] verbose: true - repo: https://github.com/psf/black - rev: 24.2.0 + rev: 24.3.0 hooks: - id: black # https://black.readthedocs.io/en/stable/integrations/source_version_control.html#version-control-integration - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.2.0 + rev: 24.3.0 hooks: - id: black-jupyter - repo: https://github.com/kynan/nbstripout - rev: 0.4.0 + rev: 0.7.1 hooks: - id: nbstripout files: ".ipynb" - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.2.6 hooks: - id: codespell - args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke"] + args: ["-L", "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor"] exclude: '^(poetry\.lock|trace-viewer/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/qa/multiple_chunk_qa.py|src/intelligence_layer/use_cases/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/use_cases/classify/keyword_extract.py|tests/use_cases/summarize/test_single_chunk_few_shot_summarize.py|tests/use_cases/summarize/very_long_text.txt)$' + - repo: https://github.com/akaihola/darglint2 + rev: v1.8.2 + hooks: + - id: darglint2 diff --git a/Concepts.md b/Concepts.md index 9464d05c5..f42bfe2c7 100644 --- a/Concepts.md +++ b/Concepts.md @@ -153,7 +153,7 @@ The Intelligence Layer supports different kinds of evaluation techniques. Most i a single output, but it is easier to compare two different outputs and decide which one is better. An example use case could be summarization. -To support these techniques the Intelligence Layer differantiates between 3 consecutive steps: +To support these techniques the Intelligence Layer differentiates between 3 consecutive steps: 1. Run a task by feeding it all inputs of a dataset and collecting all outputs 2. Evaluate the outputs of one or several @@ -197,7 +197,7 @@ There are the following Repositories: and makes them available to the `Aggregator`. - The `AggregationRepository` stores the `AggregationOverview` containing the aggregated metrics on request of the `Aggregator`. -The following diagramms illustrate how the different concepts play together in case of the different types of evaluations. +The following diagrams illustrate how the different concepts play together in case of the different types of evaluations.
diff --git a/pyproject.toml b/pyproject.toml index 3a7dd99a9..948ba3bdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,8 @@ filterwarnings = [ skip = "*/__init__.py,.venv/*,*/node_modules/*" ignore = "E501,E203" +[tool.darglint2] + [tool.pylama.linter.mccabe] max-complexity = "11" diff --git a/scripts/lint.sh b/scripts/lint.sh index 004e8b7b2..0d1ae8a3e 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -3,4 +3,3 @@ poetry run pre-commit run --all-files poetry run mypy poetry run pylama -poetry run darglint2 -v2 src diff --git a/src/examples/performance_tips.ipynb b/src/examples/performance_tips.ipynb index aac04fed4..8a530f790 100644 --- a/src/examples/performance_tips.ipynb +++ b/src/examples/performance_tips.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "d8767b2a", + "id": "0", "metadata": {}, "source": [ "# How to get more done in less time\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "e04cb25b", + "id": "1", "metadata": {}, "source": [ "## A single long running task\n", @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "e7fbae35", + "id": "2", "metadata": {}, "source": [ "## Running one task multiple times\n", @@ -40,7 +40,7 @@ { "cell_type": "code", "execution_count": null, - "id": "04dac517", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -71,7 +71,7 @@ }, { "cell_type": "markdown", - "id": "f58f359a", + "id": "4", "metadata": {}, "source": [ "## Running several tasks at the same time\n", @@ -82,7 +82,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8959fcec-dc54-4137-9cb8-3a9c70d6a3d0", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -104,7 +104,7 @@ }, { "cell_type": "markdown", - "id": "4e846c9c", + "id": "6", "metadata": {}, "source": [ "\n", @@ -115,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6c88c3a2", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -131,7 +131,7 @@ }, { "cell_type": "markdown", - "id": "345244a1", + "id": "8", "metadata": {}, "source": [ "`ThreadPool` can easily be used via the function `.map`. This processes a list of jobs in order and outputs the results once all jobs are done. \n", @@ -142,7 +142,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6b71469e", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -158,7 +158,7 @@ }, { "cell_type": "markdown", - "id": "a786e543", + "id": "10", "metadata": {}, "source": [ "`ThreadPool.map` can also be used with `Task.run_concurrently()` in which case the creation of the jobs becomes slightly easier." @@ -167,7 +167,7 @@ { "cell_type": "code", "execution_count": null, - "id": "de3fe114", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -184,7 +184,7 @@ }, { "cell_type": "markdown", - "id": "4e775da7", + "id": "12", "metadata": {}, "source": [ "
\n", diff --git a/src/examples/qa.ipynb b/src/examples/qa.ipynb index eecf3d6aa..17c8cd20c 100644 --- a/src/examples/qa.ipynb +++ b/src/examples/qa.ipynb @@ -69,7 +69,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": [ diff --git a/tests/conftest.py b/tests/conftest.py index 9ea8c67cc..071956f43 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,7 +37,11 @@ def token() -> str: @fixture(scope="session") def client(token: str) -> AlephAlphaClientProtocol: - """Provide fixture for api.""" + """Provide fixture for api. + + Args: + token: AA Token + """ return LimitedConcurrencyClient(Client(token), max_concurrency=10) diff --git a/tests/core/test_echo.py b/tests/core/test_echo.py index 114603544..74ec008cb 100644 --- a/tests/core/test_echo.py +++ b/tests/core/test_echo.py @@ -121,6 +121,9 @@ def test_overlapping_tokens_generate_correct_tokens(echo_task: Echo) -> None: """This test checks if the echo task correctly tokenizes the expected completion separately The two tokens when tokenized together will result in a combination of the end of the first token and the start of the second token. This is not the expected behaviour. + + Args: + echo_task: Fixture used for this test """ token1 = "Ä Gastronomie" token2 = "Baby" From d9f13907a2583e62af40648056c68e120642fe42 Mon Sep 17 00:00:00 2001 From: Merlin Kallenborn Date: Tue, 12 Mar 2024 16:37:37 +0100 Subject: [PATCH 4/4] ci(lint): Add step to check if docstrings match current implementation in lint.sh + update docstrings TASK: IL-296 --- tests/evaluation/test_single_huggingface_dataset_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/evaluation/test_single_huggingface_dataset_repository.py b/tests/evaluation/test_single_huggingface_dataset_repository.py index 34170debc..6d658b010 100644 --- a/tests/evaluation/test_single_huggingface_dataset_repository.py +++ b/tests/evaluation/test_single_huggingface_dataset_repository.py @@ -1,6 +1,6 @@ -from datasets import Dataset, DatasetDict # type: ignore from pytest import fixture +from datasets import Dataset, DatasetDict # type: ignore from intelligence_layer.evaluation import ( MultipleChoiceInput, SingleHuggingfaceDatasetRepository,