Skip to content

Commit

Permalink
Merge branch '24-integrate-vector-search' into 'dev'
Browse files Browse the repository at this point in the history
Resolve "Integrate Vector Search" objectbox#24

Closes objectbox#24

See merge request objectbox/objectbox-python!17
  • Loading branch information
loryruta committed Apr 12, 2024
2 parents 1395fa9 + 4ae7a46 commit 083cbba
Show file tree
Hide file tree
Showing 16 changed files with 1,181 additions and 400 deletions.
17 changes: 13 additions & 4 deletions objectbox/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,16 @@ def remove_all(self) -> int:
count = ctypes.c_uint64()
obx_box_remove_all(self._c_box, ctypes.byref(count))
return int(count.value)

def query(self, condition: QueryCondition) -> QueryBuilder:
qb = QueryBuilder(self._ob, self, self._entity, condition)
return qb

def query(self, condition: Optional[QueryCondition] = None) -> QueryBuilder:
""" Creates a QueryBuilder for the Entity that is managed by the Box.
:param condition:
If given, applies the given high-level condition to the new QueryBuilder object.
Useful for a user-friendly API design; for example:
``box.query(name_property.equals("Johnny")).build()``
"""
qb = QueryBuilder(self._ob, self)
if condition is not None:
condition.apply(qb)
return qb
321 changes: 257 additions & 64 deletions objectbox/c.py

Large diffs are not rendered by default.

234 changes: 146 additions & 88 deletions objectbox/condition.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,153 @@
from enum import Enum
from typing import *
import numpy as np

class _ConditionOp(Enum):
eq = 1
notEq = 2
contains = 3
startsWith = 4
endsWith = 5
gt = 6
greaterOrEq = 7
lt = 8
lessOrEq = 9
between = 10

class _QueryConditionOp(Enum):
EQ = 1
NOT_EQ = 2
CONTAINS = 3
STARTS_WITH = 4
ENDS_WITH = 5
GT = 6
GTE = 7
LT = 8
LTE = 9
BETWEEN = 10
NEAREST_NEIGHBOR = 11


class QueryCondition:
def __init__(self, property_id: int, op: _ConditionOp, value, value_b = None, case_sensitive: bool = True):
def __init__(self, property_id: int, op: _QueryConditionOp, args: Dict[str, Any]):
if op not in self._get_op_map():
raise Exception(f"Invalid query condition op with ID: {op}")

self._property_id = property_id
self._op = op
self._value = value
self._value_b = value_b
self._case_sensitive = case_sensitive

def apply(self, builder: 'QueryBuilder'):
if self._op == _ConditionOp.eq:
if isinstance(self._value, str):
builder.equals_string(self._property_id, self._value, self._case_sensitive)
elif isinstance(self._value, int):
builder.equals_int(self._property_id, self._value)
else:
raise Exception("Unsupported type for 'eq': " + str(type(self._value)))

elif self._op == _ConditionOp.notEq:
if isinstance(self._value, str):
builder.not_equals_string(self._property_id, self._value, self._case_sensitive)
elif isinstance(self._value, int):
builder.not_equals_int(self._property_id, self._value)
else:
raise Exception("Unsupported type for 'notEq': " + str(type(self._value)))

elif self._op == _ConditionOp.contains:
if isinstance(self._value, str):
builder.contains_string(self._property_id, self._value, self._case_sensitive)
else:
raise Exception("Unsupported type for 'contains': " + str(type(self._value)))

elif self._op == _ConditionOp.startsWith:
if isinstance(self._value, str):
builder.starts_with_string(self._property_id, self._value, self._case_sensitive)
else:
raise Exception("Unsupported type for 'startsWith': " + str(type(self._value)))

elif self._op == _ConditionOp.endsWith:
if isinstance(self._value, str):
builder.ends_with_string(self._property_id, self._value, self._case_sensitive)
else:
raise Exception("Unsupported type for 'endsWith': " + str(type(self._value)))

elif self._op == _ConditionOp.gt:
if isinstance(self._value, str):
builder.greater_than_string(self._property_id, self._value, self._case_sensitive)
elif isinstance(self._value, int):
builder.greater_than_int(self._property_id, self._value)
else:
raise Exception("Unsupported type for 'gt': " + str(type(self._value)))

elif self._op == _ConditionOp.greaterOrEq:
if isinstance(self._value, str):
builder.greater_or_equal_string(self._property_id, self._value, self._case_sensitive)
elif isinstance(self._value, int):
builder.greater_or_equal_int(self._property_id, self._value)
else:
raise Exception("Unsupported type for 'greaterOrEq': " + str(type(self._value)))

elif self._op == _ConditionOp.lt:
if isinstance(self._value, str):
builder.less_than_string(self._property_id, self._value, self._case_sensitive)
elif isinstance(self._value, int):
builder.less_than_int(self._property_id, self._value)
else:
raise Exception("Unsupported type for 'lt': " + str(type(self._value)))

elif self._op == _ConditionOp.lessOrEq:
if isinstance(self._value, str):
builder.less_or_equal_string(self._property_id, self._value, self._case_sensitive)
elif isinstance(self._value, int):
builder.less_or_equal_int(self._property_id, self._value)
else:
raise Exception("Unsupported type for 'lessOrEq': " + str(type(self._value)))

elif self._op == _ConditionOp.between:
if isinstance(self._value, int):
builder.between_2ints(self._property_id, self._value, self._value_b)
else:
raise Exception("Unsupported type for 'between': " + str(type(self._value)))
self._args = args

def _get_op_map(self):
return {
_QueryConditionOp.EQ: self._apply_eq,
_QueryConditionOp.NOT_EQ: self._apply_not_eq,
_QueryConditionOp.CONTAINS: self._apply_contains,
_QueryConditionOp.STARTS_WITH: self._apply_starts_with,
_QueryConditionOp.ENDS_WITH: self._apply_ends_with,
_QueryConditionOp.GT: self._apply_gt,
_QueryConditionOp.GTE: self._apply_gte,
_QueryConditionOp.LT: self._apply_lt,
_QueryConditionOp.LTE: self._apply_lte,
_QueryConditionOp.BETWEEN: self._apply_between,
_QueryConditionOp.NEAREST_NEIGHBOR: self._apply_nearest_neighbor
# ... new query condition here ... :)
}

def _apply_eq(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.equals_string(self._property_id, value, case_sensitive)
elif isinstance(value, int):
qb.equals_int(self._property_id, value)
else:
raise Exception(f"Unsupported type for 'EQ': {type(value)}")

def _apply_not_eq(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.not_equals_string(self._property_id, value, case_sensitive)
elif isinstance(value, int):
qb.not_equals_int(self._property_id, value)
else:
raise Exception(f"Unsupported type for 'NOT_EQ': {type(value)}")

def _apply_contains(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.contains_string(self._property_id, value, case_sensitive)
else:
raise Exception(f"Unsupported type for 'CONTAINS': {type(value)}")

def _apply_starts_with(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.starts_with_string(self._property_id, value, case_sensitive)
else:
raise Exception(f"Unsupported type for 'STARTS_WITH': {type(value)}")

def _apply_ends_with(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.ends_with_string(self._property_id, value, case_sensitive)
else:
raise Exception(f"Unsupported type for 'ENDS_WITH': {type(value)}")

def _apply_gt(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.greater_than_string(self._property_id, value, case_sensitive)
elif isinstance(value, int):
qb.greater_than_int(self._property_id, value)
else:
raise Exception(f"Unsupported type for 'GT': {type(value)}")

def _apply_gte(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.greater_or_equal_string(self._property_id, value, case_sensitive)
elif isinstance(value, int):
qb.greater_or_equal_int(self._property_id, value)
else:
raise Exception(f"Unsupported type for 'GTE': {type(value)}")

def _apply_lt(self, qb: 'QueryCondition'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.less_than_string(self._property_id, value, case_sensitive)
elif isinstance(value, int):
qb.less_than_int(self._property_id, value)
else:
raise Exception("Unsupported type for 'LT': " + str(type(value)))

def _apply_lte(self, qb: 'QueryBuilder'):
value = self._args['value']
case_sensitive = self._args['case_sensitive']
if isinstance(value, str):
qb.less_or_equal_string(self._property_id, value, case_sensitive)
elif isinstance(value, int):
qb.less_or_equal_int(self._property_id, value)
else:
raise Exception(f"Unsupported type for 'LTE': {type(value)}")

def _apply_between(self, qb: 'QueryBuilder'):
a = self._args['a']
b = self._args['b']
if isinstance(a, int):
qb.between_2ints(self._property_id, a, b)
else:
raise Exception(f"Unsupported type for 'BETWEEN': {type(a)}")

def _apply_nearest_neighbor(self, qb: 'QueryBuilder'):
query_vector = self._args['query_vector']
element_count = self._args['element_count']

if len(query_vector) == 0:
raise Exception("query_vector can't be empty")

is_float_vector = False
is_float_vector |= isinstance(query_vector, np.ndarray) and query_vector.dtype == np.float32
is_float_vector |= isinstance(query_vector, list) and type(query_vector[0]) == float
if is_float_vector:
qb.nearest_neighbors_f32(self._property_id, query_vector, element_count)
else:
raise Exception(f"Unsupported type for 'NEAREST_NEIGHBOR': {type(query_vector)}")

def apply(self, qb: 'QueryBuilder'):
self._get_op_map()[self._op](qb)
18 changes: 18 additions & 0 deletions objectbox/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import sys
import logging

logger = logging.getLogger("objectbox")


def setup_stdout_logger():
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
# Output format example:
# 2024-04-04 10:16:46,272 [objectbox-py] [DEBUG] Creating property "id" (ID=1, UID=1001)
formatter = logging.Formatter('%(asctime)s [objectbox-py] [%(levelname)-5s] %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

# Not need to hook stdout as pytest will do the job. Use --log-cli-level= to set log level
# setup_stdout_logger()
42 changes: 31 additions & 11 deletions objectbox/model/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,14 @@ def __init__(self, cls, id: int, uid: int):
self.id_property = None
self.fill_properties()

def __call__(self, *args):
return self.cls(*args)
def __call__(self, **properties):
""" The constructor of the user Entity class. """
object_ = self.cls()
for prop_name, prop_val in properties.items():
if not hasattr(object_, prop_name):
raise Exception(f"Entity {self.name} has no property \"{prop_name}\"")
setattr(object_, prop_name, prop_val)
return object_

def fill_properties(self):
# TODO allow subclassing and support entities with __slots__ defined
Expand Down Expand Up @@ -90,6 +96,24 @@ def fill_properties(self):
elif self.id_property._ob_type != OBXPropertyType_Long:
raise Exception("ID property must be an int")

def get_property(self, name: str):
""" Gets the property having the given name. """
for prop in self.properties:
if prop._name == name:
return prop
raise Exception(f"Property \"{name}\" not found in Entity: \"{self.name}\"")

def get_property_id(self, prop: Union[int, str, Property]) -> int:
""" A convenient way to get the property ID regardless having its ID, name or Property. """
if isinstance(prop, int):
return prop # We already have it!
elif isinstance(prop, str):
return self.get_property(prop)._id
elif isinstance(prop, Property):
return prop._id
else:
raise Exception(f"Unsupported Property type: {type(prop)}")

def get_value(self, object, prop: Property):
# in case value is not overwritten on the object, it's the Property object itself (= as defined in the Class)
val = getattr(object, prop._name)
Expand Down Expand Up @@ -228,12 +252,8 @@ def unmarshal(self, data: bytes):
return obj


# entity decorator - wrap _Entity to allow @Entity(id=, uid=), i.e. no class argument
def Entity(cls=None, id: int = 0, uid: int = 0):
if cls:
return _Entity(cls, id, uid)
else:
def wrapper(cls):
return _Entity(cls, id, uid)

return wrapper
def Entity(id: int = 0, uid: int = 0) -> Callable[[Type], _Entity]:
""" Entity decorator that wraps _Entity to allow @Entity(id=, uid=); i.e. no class arguments. """
def wrapper(class_):
return _Entity(class_, id, uid)
return wrapper
Loading

0 comments on commit 083cbba

Please sign in to comment.