Skip to content

Commit

Permalink
User warnings large upload (#852)
Browse files Browse the repository at this point in the history
* chore: Added warning messages when working with large points in local mode

* chore: Added deprecation message

* chore: Add default to idx

* Fix: Updated local mode user warning for large volume of points

* chore: Added cloud

* chore: Updated async client

* improve: Improved how we handle warning messages

* chore: Changed warning stack level
refactor: Refactored user warnings in local mode when large upload

* fix: Fix ci

* refactor: update warning messages

---------

Co-authored-by: George Panchuk <[email protected]>
  • Loading branch information
hh-space-invader and joein authored Dec 5, 2024
1 parent ebc358c commit 88543cb
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 31 deletions.
7 changes: 4 additions & 3 deletions qdrant_client/async_qdrant_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any, Awaitable, Callable, Iterable, Mapping, Optional, Sequence, Union
from qdrant_client import grpc as grpc
from qdrant_client.async_client_base import AsyncQdrantBase
from qdrant_client.common.deprecations import deprecation_warning_once
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client.conversions import common_types as types
from qdrant_client.embed.type_inspector import Inspector
from qdrant_client.http import AsyncApiClient, AsyncApis
Expand Down Expand Up @@ -1495,8 +1495,9 @@ async def upsert(
and len(points) > 0
and isinstance(points[0], grpc.PointStruct)
):
deprecation_warning_once(
"\n Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.\n ",
show_warning_once(
message="\n Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.\n ",
category=DeprecationWarning,
idx="grpc-input",
)
requires_inference = self._inference_inspector.inspect(points)
Expand Down
21 changes: 21 additions & 0 deletions qdrant_client/common/client_warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import warnings
from typing import Optional

SEEN_MESSAGES = set()


def show_warning(message: str, category: type[Warning] = UserWarning) -> None:
warnings.warn(message, category, stacklevel=4)


def show_warning_once(
message: str, category: type[Warning] = UserWarning, idx: Optional[str] = None
) -> None:
"""
Show a warning of the specified category only once per program run.
"""
key = idx if idx is not None else message

if key not in SEEN_MESSAGES:
SEEN_MESSAGES.add(key)
show_warning(message, category)
23 changes: 0 additions & 23 deletions qdrant_client/common/deprecations.py

This file was deleted.

12 changes: 11 additions & 1 deletion qdrant_client/local/async_qdrant_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from uuid import uuid4
import numpy as np
import portalocker
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client._pydantic_compat import to_dict
from qdrant_client.async_client_base import AsyncQdrantBase
from qdrant_client.conversions import common_types as types
Expand All @@ -46,6 +47,8 @@ class AsyncQdrantLocal(AsyncQdrantBase):
If you need more speed or size, use Qdrant server.
"""

LARGE_DATA_THRESHOLD = 20000

def __init__(self, location: str, force_disable_check_same_thread: bool = False) -> None:
"""
Initialize local Qdrant.
Expand Down Expand Up @@ -98,11 +101,18 @@ def _load(self) -> None:
for collection_name, config_json in meta["collections"].items():
config = rest_models.CreateCollection(**config_json)
collection_path = self._collection_path(collection_name)
self.collections[collection_name] = LocalCollection(
collection = LocalCollection(
config,
collection_path,
force_disable_check_same_thread=self.force_disable_check_same_thread,
)
self.collections[collection_name] = collection
if len(collection.ids) > self.LARGE_DATA_THRESHOLD:
show_warning_once(
f"Local mode is not recommended for collections with more than {self.LARGE_DATA_THRESHOLD:,} points. Collection <{collection_name}> contains {len(collection.ids)} points. Consider using Qdrant in Docker or Qdrant Cloud for better performance with large datasets.",
category=UserWarning,
idx="large-local-collection",
)
self.aliases = meta["aliases"]
lock_file_path = os.path.join(self.location, ".lock")
if not os.path.exists(lock_file_path):
Expand Down
12 changes: 12 additions & 0 deletions qdrant_client/local/local_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy as np

from qdrant_client import grpc as grpc
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client._pydantic_compat import construct, to_jsonable_python as _to_jsonable_python
from qdrant_client.conversions import common_types as types
from qdrant_client.conversions.common_types import get_args_subscribed
Expand Down Expand Up @@ -89,6 +90,8 @@ class LocalCollection:
LocalCollection is a class that represents a collection of vectors in the local storage.
"""

LARGE_DATA_THRESHOLD = 20_000

def __init__(
self,
config: models.CreateCollection,
Expand Down Expand Up @@ -2185,6 +2188,15 @@ def upsert(self, points: Union[Sequence[models.PointStruct], models.Batch]) -> N
else:
raise ValueError(f"Unsupported type: {type(points)}")

if len(self.ids) > self.LARGE_DATA_THRESHOLD:
show_warning_once(
f"Local mode is not recommended for collections with more than {self.LARGE_DATA_THRESHOLD:,} "
f"points. Current collection contains {len(self.ids)} points. "
"Consider using Qdrant in Docker or Qdrant Cloud for better performance with large datasets.",
category=UserWarning,
idx="large-local-collection",
)

def _update_named_vectors(
self, idx: int, vectors: dict[str, Union[list[float], SparseVector]]
) -> None:
Expand Down
16 changes: 15 additions & 1 deletion qdrant_client/local/qdrant_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
import portalocker

from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client._pydantic_compat import to_dict
from qdrant_client.client_base import QdrantBase
from qdrant_client.conversions import common_types as types
Expand All @@ -46,6 +47,8 @@ class QdrantLocal(QdrantBase):
If you need more speed or size, use Qdrant server.
"""

LARGE_DATA_THRESHOLD = 20_000

def __init__(self, location: str, force_disable_check_same_thread: bool = False) -> None:
"""
Initialize local Qdrant.
Expand Down Expand Up @@ -101,11 +104,22 @@ def _load(self) -> None:
for collection_name, config_json in meta["collections"].items():
config = rest_models.CreateCollection(**config_json)
collection_path = self._collection_path(collection_name)
self.collections[collection_name] = LocalCollection(
collection = LocalCollection(
config,
collection_path,
force_disable_check_same_thread=self.force_disable_check_same_thread,
)
self.collections[collection_name] = collection
if len(collection.ids) > self.LARGE_DATA_THRESHOLD:
show_warning_once(
f"Local mode is not recommended for collections with more than "
f"{self.LARGE_DATA_THRESHOLD:,} points. "
f"Collection <{collection_name}> contains {len(collection.ids)} points. "
"Consider using Qdrant in Docker or Qdrant Cloud for better performance "
"with large datasets.",
category=UserWarning,
idx="large-local-collection",
)
self.aliases = meta["aliases"]

lock_file_path = os.path.join(self.location, ".lock")
Expand Down
7 changes: 4 additions & 3 deletions qdrant_client/qdrant_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from qdrant_client import grpc as grpc
from qdrant_client.client_base import QdrantBase
from qdrant_client.common.deprecations import deprecation_warning_once
from qdrant_client.common.client_warnings import show_warning_once
from qdrant_client.conversions import common_types as types
from qdrant_client.embed.type_inspector import Inspector
from qdrant_client.http import ApiClient, SyncApis
Expand Down Expand Up @@ -1578,10 +1578,11 @@ def upsert(
and isinstance(points[0], grpc.PointStruct)
):
# gRPC structures won't support local inference feature, so we deprecated it
deprecation_warning_once(
"""
show_warning_once(
message="""
Usage of `grpc.PointStruct` is deprecated. Please use `models.PointStruct` instead.
""",
category=DeprecationWarning,
idx="grpc-input",
)

Expand Down

0 comments on commit 88543cb

Please sign in to comment.