Skip to content

Commit

Permalink
Add a TextRangeIndex index
Browse files Browse the repository at this point in the history
It allows usage of these filter suffixes:
- None
- eq
- lt
- lte
- gt
- gte
- startwith

It uses the zrangebylex command from redis so it will raise if not
supported by the current redis version (>=2.8.9)
  • Loading branch information
twidi committed Jan 26, 2018
1 parent bd405af commit d62bb2e
Show file tree
Hide file tree
Showing 5 changed files with 583 additions and 6 deletions.
14 changes: 14 additions & 0 deletions limpyd/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,20 @@ def support_scripting(self):
self._support_scripting = False
return self._support_scripting

def support_zrangebylex(self):
"""
Returns True if zrangebylex is available. Checks are done in the client
library (redis-py) AND the redis server. Result is cached, so done only
one time.
"""
if not hasattr(self, '_support_zrangebylex'):
try:
self._support_zrangebylex = self.redis_version >= (2, 8, 9) \
and hasattr(self.connection, 'zrangebylex')
except:
self._support_zrangebylex = False
return self._support_zrangebylex


class Lock(redis.client.Lock):
"""
Expand Down
296 changes: 295 additions & 1 deletion limpyd/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from logging import getLogger

from limpyd.exceptions import ImplementationError, UniquenessError
from limpyd.exceptions import ImplementationError, LimpydException, UniquenessError
from limpyd.utils import unique_key

logger = getLogger(__name__)

Expand Down Expand Up @@ -400,3 +401,296 @@ def remove(self, *args):
logger.debug("removing %s from index %s" % (pk, key))
self.connection.srem(key, pk)
self._deindexed_values.add(tuple(args))


class TextRangeIndex(BaseIndex):
"""Index allowing to filter on something greater/less than a value
See Also
---------
https://redis.io/topics/indexes#lexicographical-indexes
"""

handled_suffixes = {None, 'eq', 'gt', 'gte', 'lt', 'lte', 'startswith'}
handle_uniqueness = True
index_key_name = 'text-range'

separator = u':%s-SEPARATOR:' % index_key_name.upper()

def __init__(self, field):
super(TextRangeIndex, self).__init__(field)

try:
model = self.model
except AttributeError:
# index not yet tied to an field tied to a model
pass
else:
if not self.model.database.support_zrangebylex():
raise LimpydException(
'Your redis version %s does not seems to support ZRANGEBYLEX '
'so range indexes are not usable' % (
'.'.join(str(part) for part in self.model.database.redis_version)
)
)

def get_storage_key(self, *args):
"""Return the redis key where to store the index for the given "value" (`args`)
For this index, we store all PKs having for a field in the same sorted-set.
Key has this form:
model-name:field-name:sub-field-name:text-range
The ':sub-field-name part' is repeated for each entry in *args that is not the final value
Parameters
-----------
args: tuple
All the "values" to take into account to get the storage key. The last entry,
the final value, is not used.
Returns
-------
str
The redis key to use
"""

args = list(args)
args.pop() # final value, not needed for the storage key

parts = [
self.model._name,
self.field.name,
] + args + [
self.index_key_name,
]

return self.field.make_key(*parts)

def check_uniqueness(self, *args, **kwargs):
"""Check if the given "value" (via `args`) is unique or not.
For the parameters, see ``BaseIndex.check_uniqueness``
"""

if not self.field.unique:
return

try:
pk = self.instance.pk.get()
except AttributeError:
pk = None

key = self.get_storage_key(*args)
value = list(args)[-1]
pks = self.get_pks_for_filter(key, 'eq', self.normalize_value(value))

self.assert_pks_uniqueness(pks, pk, value)

def _prepare_value_for_storage(self, value, pk):
"""Prepare the value to be stored in the zset: value and pk separated
Parameters
----------
value: any
The value, to normalize, to use
pk: any
The pk, that will be stringified
Returns
-------
str
The string ready to use as member of the sorted set.
"""
normalized_value = self.normalize_value(value)
return self.separator.join([normalized_value, str(pk)])

def _extract_value_from_storage(self, string):
"""Taking a string that was a member of the zset, extract the value and pk
Parameters
----------
string: str
The member extracted from the sorted set
Returns
-------
tuple
Tuple with the value and the pk, extracted from the string
"""
parts = string.split(self.separator)
pk = parts.pop()
return self.separator.join(parts), pk

def add(self, *args, **kwargs):
"""Add the instance tied to the field for the given "value" (via `args`) to the index
For the parameters, see ``BaseIndex.add``
"""

check_uniqueness = kwargs.get('check_uniqueness', True)

if self.field.unique and check_uniqueness:
self.check_uniqueness(*args)

key = self.get_storage_key(*args)

args = list(args)
value = args[-1]

# We add a string "value:pk" to the storage sorted-set, with a score of 0.
# Then when filtering will get then lexicographical ordered
# And we'll later be able to extract the pk for each returned values

pk = self.instance.pk.get()
logger.debug("adding %s to index %s" % (pk, key))
self.connection.zadd(key, 0, self._prepare_value_for_storage(value, pk))
self._indexed_values.add(tuple(args))

def remove(self, *args):
"""Remove the instance tied to the field for the given "value" (via `args`) from the index
For the parameters, see ``BaseIndex.remove``
"""

key = self.get_storage_key(*args)

args = list(args)
value = args[-1]

pk = self.instance.pk.get()
logger.debug("removing %s from index %s" % (pk, key))
self.connection.zrem(key, self._prepare_value_for_storage(value, pk))
self._deindexed_values.add(tuple(args))

def get_lex_boundaries(self, filter_type, value):
"""Compute the boundaries to pass to zrangebylex depending of the filter type
Parameters
----------
filter_type: str
One of the filter suffixes in ``self.handled_suffixes``
value: str
The normalized value for which we want the boundaries
Returns
-------
tuple
A tuple with two entries, the begin and the end of the boundaries to pass
to zrangebylex
Notes
-----
For zrangebylex:
- `(` means "not included"
- `[` means "included"
- `\xff` is the last char, it allows to say "starting with"
"""

assert filter_type in self.handled_suffixes

start = '-' # from the very start
end = '+' # to the very end

if filter_type in (None, 'eq'):
# we include the separator to only get the members with the exact value
start = u'[%s%s' % (value, self.separator)
end = start.encode('utf-8') + b'\xff'

elif filter_type == 'gt':
# starting at the value, excluded
start = u'(%s' % value

elif filter_type == 'gte':
# starting at the value, included
start = u'[%s' % value

if filter_type == 'lt':
# ending with the value, excluded
end = u'(%s' % value

elif filter_type == 'lte':
# ending with the value, included (but not starting with, hence the separator)
end = u'[%s%s' % (value, self.separator)
end = end.encode('utf-8') + b'\xff'

elif filter_type == 'startswith':
# using `\xff` to simulate "startswith"
start = u'[%s' % value
end = start.encode('utf-8') + b'\xff'

return start, end

def get_pks_for_filter(self, key, filter_type, value):
"""Extract the pks from the zset key for the given type and value
It is used by the uniqueness check to extract the pks for the given value
Parameters
----------
key: str
The key of the redis sorted-set to use
filter_type: str
One of ``self.handled_suffixes``
value:
The normalized value for which we want the pks
Returns
-------
list
The list of instances PKs extracted from the sorted set
"""
start, end = self.get_lex_boundaries(filter_type, value)
members = self.connection.zrangebylex(key, start, end)
if filter_type in ('lt', 'gt'):
# special case where we don't want the exact given value, but we cannot
# exclude it from the sorted set directly
return [
member_pk
for member_value, member_pk in
[self._extract_value_from_storage(member) for member in members]
if member_value != value
]
else:
return [self._extract_value_from_storage(member)[-1] for member in members]

def get_filtered_key(self, suffix, *args, **kwargs):
"""Returns the index key for the given args "value" (`args`)
For the parameters, see ``BaseIndex.get_filtered_key``
For now, the values are retrieved from redis then put back in a redis set/zset.
This should be done via redis scripting if possible.
"""

accepted_key_types = kwargs.get('accepted_key_types', None)

if accepted_key_types\
and 'set' not in accepted_key_types and 'zset' not in accepted_key_types:
raise ImplementationError(
'%s can only return keys of type "set" or "zset"' % self.__class__.__name__
)

key = self.get_storage_key(*args)
pks = self.get_pks_for_filter(key, suffix, self.normalize_value(list(args)[-1]))

tmp_key = unique_key(self.connection)
if not accepted_key_types or 'set' in accepted_key_types:
if pks:
self.connection.sadd(tmp_key, *pks)
key_type = 'set'
elif 'zset' in accepted_key_types:
if pks:
self.connection.zadd(tmp_key, **{pk: idx for idx, pk in enumerate(pks)})
key_type = 'zset'

return tmp_key, key_type, True
9 changes: 8 additions & 1 deletion tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
import unittest

from redis import VERSION as redispy_version
from redis import VERSION as redispy_version, Redis

from limpyd.database import (RedisDatabase, DEFAULT_CONNECTION_SETTINGS)

Expand Down Expand Up @@ -100,3 +100,10 @@ def __exit__(self, exc_type, exc_value, traceback):
executed, self.num
)
)


skip_if_no_zrangebylex = (
not hasattr(Redis, 'zrangebylex'),
'Redis-py %s does not support zrangebylex' % '.'.join(map(str, redispy_version))
)

22 changes: 21 additions & 1 deletion tests/contrib/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from limpyd.contrib.collection import ExtendedCollectionManager, SORTED_SCORE, DEFAULT_STORE_TTL
from limpyd.utils import unique_key
from limpyd.exceptions import *
from tests.indexes import TextRangeIndexTestModel

from ..base import LimpydBaseTest, test_database
from ..base import LimpydBaseTest, test_database, skip_if_no_zrangebylex
from ..model import TestRedisModel, Boat as BaseBoat


Expand Down Expand Up @@ -84,6 +85,25 @@ def test_extended_collection_should_work_as_simple_one(self):
self.assertEqual(len(active_names), 2)
self.assertEqual(active_names, ['bar', 'foo'])

@unittest.skipIf(*skip_if_no_zrangebylex)
def test_range_index_should_work(self):
class TextRangeIndexTestModelExtended(TextRangeIndexTestModel):
collection_manager = ExtendedCollectionManager

obj1 = TextRangeIndexTestModelExtended(name='foo', category='cat1')
pk1 = obj1.pk.get()
TextRangeIndexTestModelExtended(name='bar')
obj3 = TextRangeIndexTestModelExtended(name='foobar', category='cat1')
pk3 = obj3.pk.get()
TextRangeIndexTestModelExtended(name='foobar', category='cat2')
TextRangeIndexTestModelExtended(name='qux')

data = set(TextRangeIndexTestModelExtended.collection(name__gte='foo', category='cat1'))
self.assertEqual(data, {
pk1, # foo and cat1
pk3, # foobar and cat1
})


class FieldOrModelAsValueForSortAndFilterTest(BaseTest):

Expand Down
Loading

0 comments on commit d62bb2e

Please sign in to comment.