weaviate · tsmith023 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/integration/test_batch_v4.py b/integration/test_batch_v4.py
@@ -687,9 +687,13 @@ def test_batching_error_logs(
         for obj in [{"name": i} for i in range(100)]:
             batch.add_object(properties=obj, collection=name)
     assert (
-        "Failed to send 100 objects in a batch of 100. Please inspect client.batch.failed_objects or collection.batch.failed_objects for the failed objects."
-        in caplog.text
-    )
+        ("Failed to send" in caplog.text)
+        and ("objects in a batch of" in caplog.text)
+        and (
+            "Please inspect client.batch.failed_objects or collection.batch.failed_objects for the failed objects."
+            in caplog.text
+        )
+    )  # number of objects sent per batch is not fixed for less than 100 objects
 
 
 def test_references_with_to_uuids(client_factory: ClientFactory) -> None:

diff --git a/weaviate/collections/batch/base.py b/weaviate/collections/batch/base.py
@@ -1,3 +1,4 @@
+import asyncio
 import math
 import threading
 import time
@@ -60,39 +61,39 @@ class BatchRequest(ABC, Generic[TBatchInput, TBatchReturn]):
 
     def __init__(self) -> None:
         self._items: List[TBatchInput] = []
-        self._lock = threading.Lock()
+        self._lock = asyncio.Lock()
 
     def __len__(self) -> int:
         return len(self._items)
 
-    def add(self, item: TBatchInput) -> None:
+    async def add(self, item: TBatchInput) -> None:
         """Add an item to the BatchRequest."""
-        self._lock.acquire()
+        await self._lock.acquire()
         self._items.append(item)
         self._lock.release()
 
-    def prepend(self, item: List[TBatchInput]) -> None:
+    async def prepend(self, item: List[TBatchInput]) -> None:
         """Add items to the front of the BatchRequest.
 
         This is intended to be used when objects should be retries, eg. after a temporary error.
         """
-        self._lock.acquire()
+        await self._lock.acquire()
         self._items = item + self._items
         self._lock.release()
 
 
 class ReferencesBatchRequest(BatchRequest[_BatchReference, BatchReferenceReturn]):
     """Collect Weaviate-object references to add them in one request to Weaviate."""
 
-    def pop_items(self, pop_amount: int, uuid_lookup: Set[str]) -> List[_BatchReference]:
+    async def pop_items(self, pop_amount: int, uuid_lookup: Set[str]) -> List[_BatchReference]:
         """Pop the given number of items from the BatchRequest queue.
 
         Returns
             `List[_BatchReference]` items from the BatchRequest.
         """
         ret: List[_BatchReference] = []
         i = 0
-        self._lock.acquire()
+        await self._lock.acquire()
         while len(ret) < pop_amount and len(self._items) > 0 and i < len(self._items):
             if self._items[i].from_uuid not in uuid_lookup and (
                 self._items[i].to_uuid is None or self._items[i].to_uuid not in uuid_lookup
@@ -107,13 +108,13 @@ def pop_items(self, pop_amount: int, uuid_lookup: Set[str]) -> List[_BatchRefere
 class ObjectsBatchRequest(BatchRequest[_BatchObject, BatchObjectReturn]):
     """Collect objects for one batch request to weaviate."""
 
-    def pop_items(self, pop_amount: int) -> List[_BatchObject]:
+    async def pop_items(self, pop_amount: int) -> List[_BatchObject]:
         """Pop the given number of items from the BatchRequest queue.
 
         Returns
             `List[_BatchObject]` items from the BatchRequest.
         """
-        self._lock.acquire()
+        await self._lock.acquire()
         if pop_amount >= len(self._items):
             ret = copy(self._items)
             self._items.clear()
@@ -174,14 +175,14 @@ def __init__(
         self.__batch_rest = _BatchREST(connection, self.__consistency_level)
 
         # lookup table for objects that are currently being processed - is used to not send references from objects that have not been added yet
-        self.__uuid_lookup_lock = threading.Lock()
+        self.__uuid_lookup_lock = asyncio.Lock()
         self.__uuid_lookup: Set[str] = set()
 
         # we do not want that users can access the results directly as they are not thread-safe
         self.__results_for_wrapper_backup = results
         self.__results_for_wrapper = _BatchDataWrapper()
 
-        self.__results_lock = threading.Lock()
+        self.__results_lock = asyncio.Lock()
 
         self.__cluster = _ClusterBatch(self.__connection)
 
@@ -221,7 +222,7 @@ def __init__(
         self.__recommended_num_refs: int = 50
 
         self.__active_requests = 0
-        self.__active_requests_lock = threading.Lock()
+        self.__active_requests_lock = asyncio.Lock()
 
         # dynamic batching
         self.__time_last_scale_up: float = 0
@@ -292,14 +293,18 @@ def __batch_send(self) -> None:
                 self.__time_stamp_last_request = time.time()
 
                 self._batch_send = True
-                self.__active_requests_lock.acquire()
+                self.__loop.run_until_complete(self.__active_requests_lock.acquire)
                 self.__active_requests += 1
                 self.__active_requests_lock.release()
 
-                objs = self.__batch_objects.pop_items(self.__recommended_num_objects)
-                self.__uuid_lookup_lock.acquire()
-                refs = self.__batch_references.pop_items(
-                    self.__recommended_num_refs, uuid_lookup=self.__uuid_lookup
+                objs = self.__loop.run_until_complete(
+                    self.__batch_objects.pop_items, self.__recommended_num_objects
+                )
+                self.__loop.run_until_complete(self.__uuid_lookup_lock.acquire)
+                refs = self.__loop.run_until_complete(
+                    self.__batch_references.pop_items,
+                    self.__recommended_num_refs,
+                    uuid_lookup=self.__uuid_lookup,
                 )
                 self.__uuid_lookup_lock.release()
                 # do not block the thread - the results are written to a central (locked) list and we want to have multiple concurrent batch-requests
@@ -513,7 +518,7 @@ async def __send_batch(
                 ]
                 readded_uuids = {obj.uuid for obj in readd_objects}
 
-                self.__batch_objects.prepend(readd_objects)
+                await self.__batch_objects.prepend(readd_objects)
 
                 new_errors = {
                     i: err for i, err in response_obj.errors.items() if i not in readded_objects
@@ -542,7 +547,7 @@ async def __send_batch(
                 else:
                     # sleep a bit to recover from the rate limit in other cases
                     time.sleep(2**highest_retry_count)
-            self.__uuid_lookup_lock.acquire()
+            await self.__uuid_lookup_lock.acquire()
             self.__uuid_lookup.difference_update(
                 obj.uuid for obj in objs if obj.uuid not in readded_uuids
             )
@@ -561,7 +566,7 @@ async def __send_batch(
                         "message": "There have been more than 30 failed object batches. Further errors will not be logged.",
                     }
                 )
-            self.__results_lock.acquire()
+            await self.__results_lock.acquire()
             self.__results_for_wrapper.results.objs += response_obj
             self.__results_for_wrapper.failed_objects.extend(response_obj.errors.values())
             self.__results_lock.release()
@@ -595,12 +600,12 @@ async def __send_batch(
                         "message": "There have been more than 30 failed reference batches. Further errors will not be logged.",
                     }
                 )
-            self.__results_lock.acquire()
+            await self.__results_lock.acquire()
             self.__results_for_wrapper.results.refs += response_ref
             self.__results_for_wrapper.failed_references.extend(response_ref.errors.values())
             self.__results_lock.release()
 
-        self.__active_requests_lock.acquire()
+        await self.__active_requests_lock.acquire()
         self.__active_requests -= 1
         self.__active_requests_lock.release()
 
@@ -636,15 +641,17 @@ def _add_object(
                 index=self.__objs_count,
             )
             self.__objs_count += 1
+            self.__loop.run_until_complete(self.__results_lock.acquire)
             self.__results_for_wrapper.imported_shards.add(
                 Shard(collection=collection, tenant=tenant)
             )
+            self.__results_lock.release()
         except ValidationError as e:
             raise WeaviateBatchValidationError(repr(e))
-        self.__uuid_lookup_lock.acquire()
+        self.__loop.run_until_complete(self.__uuid_lookup_lock.acquire)
         self.__uuid_lookup.add(str(batch_object.uuid))
         self.__uuid_lookup_lock.release()
-        self.__batch_objects.add(batch_object._to_internal())
+        self.__loop.run_until_complete(self.__batch_objects.add, batch_object._to_internal())
 
         # block if queue gets too long or weaviate is overloaded - reading files is faster them sending them so we do
         # not need a long queue
@@ -688,7 +695,9 @@ def _add_reference(
                 )
             except ValidationError as e:
                 raise WeaviateBatchValidationError(repr(e))
-            self.__batch_references.add(batch_reference._to_internal())
+            self.__loop.run_until_complete(
+                self.__batch_references.add, batch_reference._to_internal()
+            )
 
         # block if queue gets too long or weaviate is overloaded
         while self.__recommended_num_objects == 0: