diff --git a/qdrant_client/async_qdrant_client.py b/qdrant_client/async_qdrant_client.py index 98fd9404..1b1e0eea 100644 --- a/qdrant_client/async_qdrant_client.py +++ b/qdrant_client/async_qdrant_client.py @@ -14,7 +14,7 @@ from typing import Any, Awaitable, Callable, Iterable, Mapping, Optional, Sequence, Union from qdrant_client import grpc as grpc from qdrant_client.async_client_base import AsyncQdrantBase -from qdrant_client.common.deprecations import deprecation_warning_once +from qdrant_client.common.client_warnings import show_warning_once from qdrant_client.conversions import common_types as types from qdrant_client.embed.type_inspector import Inspector from qdrant_client.http import AsyncApiClient, AsyncApis @@ -1495,8 +1495,9 @@ async def upsert( and len(points) > 0 and isinstance(points[0], grpc.PointStruct) ): - deprecation_warning_once( - "\n Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.\n ", + show_warning_once( + message="\n Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.\n ", + category=DeprecationWarning, idx="grpc-input", ) requires_inference = self._inference_inspector.inspect(points) diff --git a/qdrant_client/common/client_warnings.py b/qdrant_client/common/client_warnings.py new file mode 100644 index 00000000..16fb7340 --- /dev/null +++ b/qdrant_client/common/client_warnings.py @@ -0,0 +1,21 @@ +import warnings +from typing import Optional + +SEEN_MESSAGES = set() + + +def show_warning(message: str, category: type[Warning] = UserWarning) -> None: + warnings.warn(message, category, stacklevel=4) + + +def show_warning_once( + message: str, category: type[Warning] = UserWarning, idx: Optional[str] = None +) -> None: + """ + Show a warning of the specified category only once per program run. + """ + key = idx if idx is not None else message + + if key not in SEEN_MESSAGES: + SEEN_MESSAGES.add(key) + show_warning(message, category) diff --git a/qdrant_client/common/deprecations.py b/qdrant_client/common/deprecations.py deleted file mode 100644 index e649e2ca..00000000 --- a/qdrant_client/common/deprecations.py +++ /dev/null @@ -1,23 +0,0 @@ -import warnings -from typing import Optional - -SEEN_DEPRECATIONS = set() - - -def deprecation_warning(message: str) -> None: - warnings.warn(message, DeprecationWarning, stacklevel=2) - - -def deprecation_warning_once(message: str, idx: Optional[str]) -> None: - """ - Same as deprecation_warning, but will be shown only once per program run. - """ - - key = idx if idx is not None else message - - if key not in SEEN_DEPRECATIONS: - SEEN_DEPRECATIONS.add(key) - else: - return - - warnings.warn(message, DeprecationWarning, stacklevel=2) diff --git a/qdrant_client/local/async_qdrant_local.py b/qdrant_client/local/async_qdrant_local.py index 5b8ec244..7a4aaa2f 100644 --- a/qdrant_client/local/async_qdrant_local.py +++ b/qdrant_client/local/async_qdrant_local.py @@ -21,6 +21,7 @@ from uuid import uuid4 import numpy as np import portalocker +from qdrant_client.common.client_warnings import show_warning_once from qdrant_client._pydantic_compat import to_dict from qdrant_client.async_client_base import AsyncQdrantBase from qdrant_client.conversions import common_types as types @@ -46,6 +47,8 @@ class AsyncQdrantLocal(AsyncQdrantBase): If you need more speed or size, use Qdrant server. """ + LARGE_DATA_THRESHOLD = 20000 + def __init__(self, location: str, force_disable_check_same_thread: bool = False) -> None: """ Initialize local Qdrant. @@ -98,11 +101,18 @@ def _load(self) -> None: for collection_name, config_json in meta["collections"].items(): config = rest_models.CreateCollection(**config_json) collection_path = self._collection_path(collection_name) - self.collections[collection_name] = LocalCollection( + collection = LocalCollection( config, collection_path, force_disable_check_same_thread=self.force_disable_check_same_thread, ) + self.collections[collection_name] = collection + if len(collection.ids) > self.LARGE_DATA_THRESHOLD: + show_warning_once( + f"Local mode is not recommended for collections with more than {self.LARGE_DATA_THRESHOLD:,} points. Collection <{collection_name}> contains {len(collection.ids)} points. Consider using Qdrant in Docker or Qdrant Cloud for better performance with large datasets.", + category=UserWarning, + idx="large-local-collection", + ) self.aliases = meta["aliases"] lock_file_path = os.path.join(self.location, ".lock") if not os.path.exists(lock_file_path): diff --git a/qdrant_client/local/local_collection.py b/qdrant_client/local/local_collection.py index 955143f1..ab679ad1 100644 --- a/qdrant_client/local/local_collection.py +++ b/qdrant_client/local/local_collection.py @@ -15,6 +15,7 @@ import numpy as np from qdrant_client import grpc as grpc +from qdrant_client.common.client_warnings import show_warning_once from qdrant_client._pydantic_compat import construct, to_jsonable_python as _to_jsonable_python from qdrant_client.conversions import common_types as types from qdrant_client.conversions.common_types import get_args_subscribed @@ -89,6 +90,8 @@ class LocalCollection: LocalCollection is a class that represents a collection of vectors in the local storage. """ + LARGE_DATA_THRESHOLD = 20_000 + def __init__( self, config: models.CreateCollection, @@ -2185,6 +2188,15 @@ def upsert(self, points: Union[Sequence[models.PointStruct], models.Batch]) -> N else: raise ValueError(f"Unsupported type: {type(points)}") + if len(self.ids) > self.LARGE_DATA_THRESHOLD: + show_warning_once( + f"Local mode is not recommended for collections with more than {self.LARGE_DATA_THRESHOLD:,} " + f"points. Current collection contains {len(self.ids)} points. " + "Consider using Qdrant in Docker or Qdrant Cloud for better performance with large datasets.", + category=UserWarning, + idx="large-local-collection", + ) + def _update_named_vectors( self, idx: int, vectors: dict[str, Union[list[float], SparseVector]] ) -> None: diff --git a/qdrant_client/local/qdrant_local.py b/qdrant_client/local/qdrant_local.py index e560c03a..15f69ca0 100644 --- a/qdrant_client/local/qdrant_local.py +++ b/qdrant_client/local/qdrant_local.py @@ -21,6 +21,7 @@ import numpy as np import portalocker +from qdrant_client.common.client_warnings import show_warning_once from qdrant_client._pydantic_compat import to_dict from qdrant_client.client_base import QdrantBase from qdrant_client.conversions import common_types as types @@ -46,6 +47,8 @@ class QdrantLocal(QdrantBase): If you need more speed or size, use Qdrant server. """ + LARGE_DATA_THRESHOLD = 20_000 + def __init__(self, location: str, force_disable_check_same_thread: bool = False) -> None: """ Initialize local Qdrant. @@ -101,11 +104,22 @@ def _load(self) -> None: for collection_name, config_json in meta["collections"].items(): config = rest_models.CreateCollection(**config_json) collection_path = self._collection_path(collection_name) - self.collections[collection_name] = LocalCollection( + collection = LocalCollection( config, collection_path, force_disable_check_same_thread=self.force_disable_check_same_thread, ) + self.collections[collection_name] = collection + if len(collection.ids) > self.LARGE_DATA_THRESHOLD: + show_warning_once( + f"Local mode is not recommended for collections with more than " + f"{self.LARGE_DATA_THRESHOLD:,} points. " + f"Collection <{collection_name}> contains {len(collection.ids)} points. " + "Consider using Qdrant in Docker or Qdrant Cloud for better performance " + "with large datasets.", + category=UserWarning, + idx="large-local-collection", + ) self.aliases = meta["aliases"] lock_file_path = os.path.join(self.location, ".lock") diff --git a/qdrant_client/qdrant_client.py b/qdrant_client/qdrant_client.py index 4d3dab4a..6b2b16c6 100644 --- a/qdrant_client/qdrant_client.py +++ b/qdrant_client/qdrant_client.py @@ -13,7 +13,7 @@ from qdrant_client import grpc as grpc from qdrant_client.client_base import QdrantBase -from qdrant_client.common.deprecations import deprecation_warning_once +from qdrant_client.common.client_warnings import show_warning_once from qdrant_client.conversions import common_types as types from qdrant_client.embed.type_inspector import Inspector from qdrant_client.http import ApiClient, SyncApis @@ -1578,10 +1578,11 @@ def upsert( and isinstance(points[0], grpc.PointStruct) ): # gRPC structures won't support local inference feature, so we deprecated it - deprecation_warning_once( - """ + show_warning_once( + message=""" Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead. """, + category=DeprecationWarning, idx="grpc-input", )