Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User warnings large upload #852

Merged
merged 10 commits into from
Dec 5, 2024
7 changes: 4 additions & 3 deletions qdrant_client/async_qdrant_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any, Awaitable, Callable, Iterable, Mapping, Optional, Sequence, Union
from qdrant_client import grpc as grpc
from qdrant_client.async_client_base import AsyncQdrantBase
from qdrant_client.common.deprecations import deprecation_warning_once
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client.conversions import common_types as types
from qdrant_client.embed.type_inspector import Inspector
from qdrant_client.http import AsyncApiClient, AsyncApis
Expand Down Expand Up @@ -1495,8 +1495,9 @@ async def upsert(
and len(points) > 0
and isinstance(points[0], grpc.PointStruct)
):
deprecation_warning_once(
"\n Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.\n ",
show_warning_once(
message="\n Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.\n ",
category=DeprecationWarning,
idx="grpc-input",
)
requires_inference = self._inference_inspector.inspect(points)
Expand Down
21 changes: 21 additions & 0 deletions qdrant_client/common/client_warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import warnings
from typing import Optional

SEEN_MESSAGES = set()


def show_warning(message: str, category: type[Warning] = UserWarning) -> None:
warnings.warn(message, category, stacklevel=4)


def show_warning_once(
message: str, category: type[Warning] = UserWarning, idx: Optional[str] = None
) -> None:
"""
Show a warning of the specified category only once per program run.
"""
key = idx if idx is not None else message

if key not in SEEN_MESSAGES:
SEEN_MESSAGES.add(key)
show_warning(message, category)
hh-space-invader marked this conversation as resolved.
Show resolved Hide resolved
23 changes: 0 additions & 23 deletions qdrant_client/common/deprecations.py

This file was deleted.

12 changes: 11 additions & 1 deletion qdrant_client/local/async_qdrant_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from uuid import uuid4
import numpy as np
import portalocker
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client._pydantic_compat import to_dict
from qdrant_client.async_client_base import AsyncQdrantBase
from qdrant_client.conversions import common_types as types
Expand All @@ -46,6 +47,8 @@ class AsyncQdrantLocal(AsyncQdrantBase):
If you need more speed or size, use Qdrant server.
"""

LARGE_DATA_THRESHOLD = 20000

def __init__(self, location: str, force_disable_check_same_thread: bool = False) -> None:
"""
Initialize local Qdrant.
Expand Down Expand Up @@ -98,11 +101,18 @@ def _load(self) -> None:
for collection_name, config_json in meta["collections"].items():
config = rest_models.CreateCollection(**config_json)
collection_path = self._collection_path(collection_name)
self.collections[collection_name] = LocalCollection(
collection = LocalCollection(
config,
collection_path,
force_disable_check_same_thread=self.force_disable_check_same_thread,
)
self.collections[collection_name] = collection
if len(collection.ids) > self.LARGE_DATA_THRESHOLD:
show_warning_once(
f"Local mode is not recommended for collections with more than {self.LARGE_DATA_THRESHOLD:,} points. Collection <{collection_name}> contains {len(collection.ids)} points. Consider using Qdrant in Docker or Qdrant Cloud for better performance with large datasets.",
category=UserWarning,
idx="large-local-collection",
)
self.aliases = meta["aliases"]
lock_file_path = os.path.join(self.location, ".lock")
if not os.path.exists(lock_file_path):
Expand Down
12 changes: 12 additions & 0 deletions qdrant_client/local/local_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy as np

from qdrant_client import grpc as grpc
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client._pydantic_compat import construct, to_jsonable_python as _to_jsonable_python
from qdrant_client.conversions import common_types as types
from qdrant_client.conversions.common_types import get_args_subscribed
Expand Down Expand Up @@ -89,6 +90,8 @@ class LocalCollection:
LocalCollection is a class that represents a collection of vectors in the local storage.
"""

LARGE_DATA_THRESHOLD = 20_000

def __init__(
self,
config: models.CreateCollection,
Expand Down Expand Up @@ -2185,6 +2188,15 @@ def upsert(self, points: Union[Sequence[models.PointStruct], models.Batch]) -> N
else:
raise ValueError(f"Unsupported type: {type(points)}")

if len(self.ids) > self.LARGE_DATA_THRESHOLD:
show_warning_once(
f"Local mode is not recommended for collections with more than {self.LARGE_DATA_THRESHOLD:,} "
f"points. Current collection contains {len(self.ids)} points. "
"Consider using Qdrant in Docker or Qdrant Cloud for better performance with large datasets.",
category=UserWarning,
idx="large-local-collection",
)

def _update_named_vectors(
self, idx: int, vectors: dict[str, Union[list[float], SparseVector]]
) -> None:
Expand Down
16 changes: 15 additions & 1 deletion qdrant_client/local/qdrant_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
import portalocker

from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client._pydantic_compat import to_dict
from qdrant_client.client_base import QdrantBase
from qdrant_client.conversions import common_types as types
Expand All @@ -46,6 +47,8 @@ class QdrantLocal(QdrantBase):
If you need more speed or size, use Qdrant server.
"""

LARGE_DATA_THRESHOLD = 20_000

def __init__(self, location: str, force_disable_check_same_thread: bool = False) -> None:
"""
Initialize local Qdrant.
Expand Down Expand Up @@ -101,11 +104,22 @@ def _load(self) -> None:
for collection_name, config_json in meta["collections"].items():
config = rest_models.CreateCollection(**config_json)
collection_path = self._collection_path(collection_name)
self.collections[collection_name] = LocalCollection(
collection = LocalCollection(
config,
collection_path,
force_disable_check_same_thread=self.force_disable_check_same_thread,
)
self.collections[collection_name] = collection
if len(collection.ids) > self.LARGE_DATA_THRESHOLD:
show_warning_once(
f"Local mode is not recommended for collections with more than "
f"{self.LARGE_DATA_THRESHOLD:,} points. "
f"Collection <{collection_name}> contains {len(collection.ids)} points. "
"Consider using Qdrant in Docker or Qdrant Cloud for better performance "
"with large datasets.",
category=UserWarning,
idx="large-local-collection",
)
self.aliases = meta["aliases"]

lock_file_path = os.path.join(self.location, ".lock")
Expand Down
7 changes: 4 additions & 3 deletions qdrant_client/qdrant_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from qdrant_client import grpc as grpc
from qdrant_client.client_base import QdrantBase
from qdrant_client.common.deprecations import deprecation_warning_once
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client.conversions import common_types as types
from qdrant_client.embed.type_inspector import Inspector
from qdrant_client.http import ApiClient, SyncApis
Expand Down Expand Up @@ -1578,10 +1578,11 @@ def upsert(
and isinstance(points[0], grpc.PointStruct)
):
# gRPC structures won't support local inference feature, so we deprecated it
deprecation_warning_once(
"""
show_warning_once(
message="""
Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.
""",
category=DeprecationWarning,
idx="grpc-input",
)

Expand Down
Loading