anthropics · RobertCraigie · Nov 4, 2024 · Nov 4, 2024
@@ -15,7 +15,6 @@ dependencies = [
     "distro>=1.7.0, <2",
     "sniffio",
     "cached-property; python_version < '3.8'",
-    "tokenizers >= 0.13.0",
     "jiter>=0.4.0, <1",
 ]
 requires-python = ">= 3.7"

@@ -30,9 +30,6 @@ cachetools==5.5.0
 certifi==2023.7.22
     # via httpcore
     # via httpx
-    # via requests
-charset-normalizer==3.4.0
-    # via requests
 colorlog==6.7.0
     # via nox
 dirty-equals==0.6.0
@@ -44,10 +41,7 @@ exceptiongroup==1.2.2
     # via anyio
     # via pytest
 filelock==3.12.4
-    # via huggingface-hub
     # via virtualenv
-fsspec==2024.10.0
-    # via huggingface-hub
 google-auth==2.35.0
     # via anthropic
 h11==0.14.0
@@ -57,12 +51,9 @@ httpcore==1.0.2
 httpx==0.25.2
     # via anthropic
     # via respx
-huggingface-hub==0.26.1
-    # via tokenizers
 idna==3.4
     # via anyio
     # via httpx
-    # via requests
 importlib-metadata==7.0.0
 iniconfig==2.0.0
     # via pytest
@@ -82,7 +73,6 @@ nodeenv==1.8.0
     # via pyright
 nox==2023.4.22
 packaging==23.2
-    # via huggingface-hub
     # via nox
     # via pytest
 platformdirs==3.11.0
@@ -109,10 +99,6 @@ python-dateutil==2.8.2
     # via time-machine
 pytz==2023.3.post1
     # via dirty-equals
-pyyaml==6.0.2
-    # via huggingface-hub
-requests==2.32.3
-    # via huggingface-hub
 respx==0.20.2
 rich==13.7.1
 rsa==4.9
@@ -129,13 +115,9 @@ sniffio==1.3.0
     # via anyio
     # via httpx
 time-machine==2.9.0
-tokenizers==0.20.1
-    # via anthropic
 tomli==2.0.2
     # via mypy
     # via pytest
-tqdm==4.66.5
-    # via huggingface-hub
 types-awscrt==0.23.0
     # via botocore-stubs
 types-s3transfer==0.10.3
@@ -144,13 +126,11 @@ typing-extensions==4.12.2
     # via anthropic
     # via anyio
     # via boto3-stubs
-    # via huggingface-hub
     # via mypy
     # via pydantic
     # via pydantic-core
 urllib3==1.26.20
     # via botocore
-    # via requests
 virtualenv==20.24.5
     # via nox
 zipp==3.17.0

@@ -25,17 +25,10 @@ cachetools==5.5.0
 certifi==2023.7.22
     # via httpcore
     # via httpx
-    # via requests
-charset-normalizer==3.4.0
-    # via requests
 distro==1.8.0
     # via anthropic
 exceptiongroup==1.2.2
     # via anyio
-filelock==3.16.1
-    # via huggingface-hub
-fsspec==2024.10.0
-    # via huggingface-hub
 google-auth==2.35.0
     # via anthropic
 h11==0.14.0
@@ -44,19 +37,14 @@ httpcore==1.0.2
     # via httpx
 httpx==0.25.2
     # via anthropic
-huggingface-hub==0.26.1
-    # via tokenizers
 idna==3.4
     # via anyio
     # via httpx
-    # via requests
 jiter==0.6.1
     # via anthropic
 jmespath==1.0.1
     # via boto3
     # via botocore
-packaging==24.1
-    # via huggingface-hub
 pyasn1==0.6.1
     # via pyasn1-modules
     # via rsa
@@ -68,10 +56,6 @@ pydantic-core==2.23.4
     # via pydantic
 python-dateutil==2.9.0.post0
     # via botocore
-pyyaml==6.0.2
-    # via huggingface-hub
-requests==2.32.3
-    # via huggingface-hub
 rsa==4.9
     # via google-auth
 s3transfer==0.10.3
@@ -82,16 +66,10 @@ sniffio==1.3.0
     # via anthropic
     # via anyio
     # via httpx
-tokenizers==0.20.1
-    # via anthropic
-tqdm==4.66.5
-    # via huggingface-hub
 typing-extensions==4.12.2
     # via anthropic
     # via anyio
-    # via huggingface-hub
     # via pydantic
     # via pydantic-core
 urllib3==1.26.20
     # via botocore
-    # via requests
@@ -57,9 +57,3 @@ rye run pytest "$@"
 
 echo "==> Running Pydantic v1 tests"
 rye run nox -s test-pydantic-v1 -- "$@"
-
-# this is a separate script instead of a pytest test as we can't rely on the
-# execution order, so a tokenizer test could be executed before this check which
-# invalidates everything
-echo "==> Verifying that \`tokenizers\` is lazily imported"
-rye run python -c 'import anthropic, sys; assert "tokenizers" not in sys.modules; print("true")'
@@ -28,11 +28,6 @@
 from ._version import __version__
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import APIStatusError
-from ._tokenizers import (
-    TokenizerType,  # type: ignore[import]
-    sync_get_tokenizer,
-    async_get_tokenizer,
-)
 from ._base_client import (
     DEFAULT_MAX_RETRIES,
     DEFAULT_CONNECTION_LIMITS,
@@ -267,24 +262,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def count_tokens(
-        self,
-        text: str,
-    ) -> int:
-        """Count the number of tokens in a given string.
-
-        Note that this is only accurate for older models, e.g. `claude-2.1`. For newer
-        models this can only be used as a _very_ rough estimate, instead you should rely
-        on the `usage` property in the response for exact counts.
-        """
-        # Note: tokenizer is untyped
-        tokenizer = self.get_tokenizer()
-        encoded_text = tokenizer.encode(text)  # type: ignore
-        return len(encoded_text.ids)  # type: ignore
-
-    def get_tokenizer(self) -> TokenizerType:
-        return sync_get_tokenizer()
-
     @override
     def _make_status_error(
         self,
@@ -531,24 +508,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    async def count_tokens(
-        self,
-        text: str,
-    ) -> int:
-        """Count the number of tokens in a given string.
-
-        Note that this is only accurate for older models, e.g. `claude-2.1`. For newer
-        models this can only be used as a _very_ rough estimate, instead you should rely
-        on the `usage` property in the response for exact counts.
-        """
-        # Note: tokenizer is untyped
-        tokenizer = await self.get_tokenizer()
-        encoded_text = tokenizer.encode(text)  # type: ignore
-        return len(encoded_text.ids)  # type: ignore
-
-    async def get_tokenizer(self) -> TokenizerType:
-        return await async_get_tokenizer()
-
     @override
     def _make_status_error(
         self,