From f4acc04fc24be33e279826ffa971b169cedd8f81 Mon Sep 17 00:00:00 2001 From: Moritz Althaus Date: Fri, 9 Aug 2024 16:49:48 +0200 Subject: [PATCH] feat: maximum_tokens attribute of CompletionRequest defaults to None --- aleph_alpha_client/completion.py | 8 +++++--- tests/test_complete.py | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py index e373fc1..6fe8282 100644 --- a/aleph_alpha_client/completion.py +++ b/aleph_alpha_client/completion.py @@ -15,12 +15,14 @@ class CompletionRequest: Unconditional completion can be started with an empty string (default). The prompt may contain a zero shot or few shot task. - maximum_tokens (int, optional, default 64): + maximum_tokens (int, optional, default None): The maximum number of tokens to be generated. Completion will terminate after the maximum number of tokens is reached. Increase this value to generate longer texts. A text is split into tokens. Usually there are more tokens than words. The maximum supported number of tokens depends on the model (for luminous-base, it may not exceed 2048 tokens). - The prompt's tokens plus the maximum_tokens request must not exceed this number. + The prompt's tokens plus the maximum_tokens request must not exceed this number. If set to None, the model will stop + generating tokens either if it outputs a sequence specified in `stop_sequences` or if it reaches its technical limit. + For most models, this means that the sum of input and output tokens is equal to its context window. temperature (float, optional, default 0.0) A higher sampling temperature encourages the model to produce less probable outputs ("be more creative"). Values are expected in a range from 0.0 to 1.0. Try high values (e.g. 0.9) for a more "creative" response and the default 0.0 for a well defined and repeatable answer. @@ -181,7 +183,7 @@ class CompletionRequest: """ prompt: Prompt - maximum_tokens: int = 64 + maximum_tokens: Optional[int] = None temperature: float = 0.0 top_k: int = 0 top_p: float = 0.0 diff --git a/tests/test_complete.py b/tests/test_complete.py index 82a9a9a..6c21063 100644 --- a/tests/test_complete.py +++ b/tests/test_complete.py @@ -18,9 +18,6 @@ ) -# AsyncClient - - @pytest.mark.system_test async def test_can_complete_with_async_client( async_client: AsyncClient, model_name: str @@ -35,7 +32,18 @@ async def test_can_complete_with_async_client( assert response.model_version is not None -# Client +@pytest.mark.system_test +def test_complete_maximum_tokens_none(sync_client: Client, model_name: str): + request = CompletionRequest( + prompt=Prompt.from_text("Hello, World!"), + maximum_tokens=None, + stop_sequences=[","], + ) + + response = sync_client.complete(request, model=model_name) + assert len(response.completions) == 1 + assert response.completions[0].completion is not None + assert len(response.completions[0].completion) < 100 @pytest.mark.system_test