Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(client)!: remove legacy client.count_tokens() method #726

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ dependencies = [
"distro>=1.7.0, <2",
"sniffio",
"cached-property; python_version < '3.8'",
"tokenizers >= 0.13.0",
"jiter>=0.4.0, <1",
]
requires-python = ">= 3.7"
Expand Down
20 changes: 0 additions & 20 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ cachetools==5.5.0
certifi==2023.7.22
# via httpcore
# via httpx
# via requests
charset-normalizer==3.4.0
# via requests
colorlog==6.7.0
# via nox
dirty-equals==0.6.0
Expand All @@ -44,10 +41,7 @@ exceptiongroup==1.2.2
# via anyio
# via pytest
filelock==3.12.4
# via huggingface-hub
# via virtualenv
fsspec==2024.10.0
# via huggingface-hub
google-auth==2.35.0
# via anthropic
h11==0.14.0
Expand All @@ -57,12 +51,9 @@ httpcore==1.0.2
httpx==0.25.2
# via anthropic
# via respx
huggingface-hub==0.26.1
# via tokenizers
idna==3.4
# via anyio
# via httpx
# via requests
importlib-metadata==7.0.0
iniconfig==2.0.0
# via pytest
Expand All @@ -82,7 +73,6 @@ nodeenv==1.8.0
# via pyright
nox==2023.4.22
packaging==23.2
# via huggingface-hub
# via nox
# via pytest
platformdirs==3.11.0
Expand All @@ -109,10 +99,6 @@ python-dateutil==2.8.2
# via time-machine
pytz==2023.3.post1
# via dirty-equals
pyyaml==6.0.2
# via huggingface-hub
requests==2.32.3
# via huggingface-hub
respx==0.20.2
rich==13.7.1
rsa==4.9
Expand All @@ -129,13 +115,9 @@ sniffio==1.3.0
# via anyio
# via httpx
time-machine==2.9.0
tokenizers==0.20.1
# via anthropic
tomli==2.0.2
# via mypy
# via pytest
tqdm==4.66.5
# via huggingface-hub
types-awscrt==0.23.0
# via botocore-stubs
types-s3transfer==0.10.3
Expand All @@ -144,13 +126,11 @@ typing-extensions==4.12.2
# via anthropic
# via anyio
# via boto3-stubs
# via huggingface-hub
# via mypy
# via pydantic
# via pydantic-core
urllib3==1.26.20
# via botocore
# via requests
virtualenv==20.24.5
# via nox
zipp==3.17.0
Expand Down
22 changes: 0 additions & 22 deletions requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,10 @@ cachetools==5.5.0
certifi==2023.7.22
# via httpcore
# via httpx
# via requests
charset-normalizer==3.4.0
# via requests
distro==1.8.0
# via anthropic
exceptiongroup==1.2.2
# via anyio
filelock==3.16.1
# via huggingface-hub
fsspec==2024.10.0
# via huggingface-hub
google-auth==2.35.0
# via anthropic
h11==0.14.0
Expand All @@ -44,19 +37,14 @@ httpcore==1.0.2
# via httpx
httpx==0.25.2
# via anthropic
huggingface-hub==0.26.1
# via tokenizers
idna==3.4
# via anyio
# via httpx
# via requests
jiter==0.6.1
# via anthropic
jmespath==1.0.1
# via boto3
# via botocore
packaging==24.1
# via huggingface-hub
pyasn1==0.6.1
# via pyasn1-modules
# via rsa
Expand All @@ -68,10 +56,6 @@ pydantic-core==2.23.4
# via pydantic
python-dateutil==2.9.0.post0
# via botocore
pyyaml==6.0.2
# via huggingface-hub
requests==2.32.3
# via huggingface-hub
rsa==4.9
# via google-auth
s3transfer==0.10.3
Expand All @@ -82,16 +66,10 @@ sniffio==1.3.0
# via anthropic
# via anyio
# via httpx
tokenizers==0.20.1
# via anthropic
tqdm==4.66.5
# via huggingface-hub
typing-extensions==4.12.2
# via anthropic
# via anyio
# via huggingface-hub
# via pydantic
# via pydantic-core
urllib3==1.26.20
# via botocore
# via requests
6 changes: 0 additions & 6 deletions scripts/test
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,3 @@ rye run pytest "$@"

echo "==> Running Pydantic v1 tests"
rye run nox -s test-pydantic-v1 -- "$@"

# this is a separate script instead of a pytest test as we can't rely on the
# execution order, so a tokenizer test could be executed before this check which
# invalidates everything
echo "==> Verifying that \`tokenizers\` is lazily imported"
rye run python -c 'import anthropic, sys; assert "tokenizers" not in sys.modules; print("true")'
41 changes: 0 additions & 41 deletions src/anthropic/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@
from ._version import __version__
from ._streaming import Stream as Stream, AsyncStream as AsyncStream
from ._exceptions import APIStatusError
from ._tokenizers import (
TokenizerType, # type: ignore[import]
sync_get_tokenizer,
async_get_tokenizer,
)
from ._base_client import (
DEFAULT_MAX_RETRIES,
DEFAULT_CONNECTION_LIMITS,
Expand Down Expand Up @@ -267,24 +262,6 @@ def copy(
# client.with_options(timeout=10).foo.create(...)
with_options = copy

def count_tokens(
self,
text: str,
) -> int:
"""Count the number of tokens in a given string.

Note that this is only accurate for older models, e.g. `claude-2.1`. For newer
models this can only be used as a _very_ rough estimate, instead you should rely
on the `usage` property in the response for exact counts.
"""
# Note: tokenizer is untyped
tokenizer = self.get_tokenizer()
encoded_text = tokenizer.encode(text) # type: ignore
return len(encoded_text.ids) # type: ignore

def get_tokenizer(self) -> TokenizerType:
return sync_get_tokenizer()

@override
def _make_status_error(
self,
Expand Down Expand Up @@ -531,24 +508,6 @@ def copy(
# client.with_options(timeout=10).foo.create(...)
with_options = copy

async def count_tokens(
self,
text: str,
) -> int:
"""Count the number of tokens in a given string.

Note that this is only accurate for older models, e.g. `claude-2.1`. For newer
models this can only be used as a _very_ rough estimate, instead you should rely
on the `usage` property in the response for exact counts.
"""
# Note: tokenizer is untyped
tokenizer = await self.get_tokenizer()
encoded_text = tokenizer.encode(text) # type: ignore
return len(encoded_text.ids) # type: ignore

async def get_tokenizer(self) -> TokenizerType:
return await async_get_tokenizer()

@override
def _make_status_error(
self,
Expand Down
50 changes: 0 additions & 50 deletions src/anthropic/_tokenizers.py

This file was deleted.

1 change: 0 additions & 1 deletion src/anthropic/tokenizer.json

This file was deleted.

27 changes: 0 additions & 27 deletions tests/api_resources/test_client.py

This file was deleted.

76 changes: 0 additions & 76 deletions tests/test_tokenizer.py

This file was deleted.