Add a TextRangeIndex index

It allows usage of these filter suffixes: - None - eq - lt - lte - gt - gte - startwith It uses the zrangebylex command from redis so it will raise if not supported by the current redis version (>=2.8.9)
limpyd · Jan 26, 2018 · d62bb2e · d62bb2e
1 parent bd405af
commit d62bb2e
Show file tree

Hide file tree

Showing 5 changed files with 583 additions and 6 deletions.
diff --git a/limpyd/database.py b/limpyd/database.py
@@ -154,6 +154,20 @@ def support_scripting(self):
                 self._support_scripting = False
         return self._support_scripting
 
+    def support_zrangebylex(self):
+        """
+        Returns True if zrangebylex is available. Checks are done in the client
+        library (redis-py) AND the redis server. Result is cached, so done only
+        one time.
+        """
+        if not hasattr(self, '_support_zrangebylex'):
+            try:
+                self._support_zrangebylex = self.redis_version >= (2, 8, 9) \
+                    and hasattr(self.connection, 'zrangebylex')
+            except:
+                self._support_zrangebylex = False
+        return self._support_zrangebylex
+
 
 class Lock(redis.client.Lock):
     """

diff --git a/limpyd/indexes.py b/limpyd/indexes.py
@@ -4,7 +4,8 @@
 
 from logging import getLogger
 
-from limpyd.exceptions import ImplementationError, UniquenessError
+from limpyd.exceptions import ImplementationError, LimpydException, UniquenessError
+from limpyd.utils import unique_key
 
 logger = getLogger(__name__)
 
@@ -400,3 +401,296 @@ def remove(self, *args):
         logger.debug("removing %s from index %s" % (pk, key))
         self.connection.srem(key, pk)
         self._deindexed_values.add(tuple(args))
+
+
+class TextRangeIndex(BaseIndex):
+    """Index allowing to filter on something greater/less than a value
+
+    See Also
+    ---------
+    https://redis.io/topics/indexes#lexicographical-indexes
+
+    """
+
+    handled_suffixes = {None, 'eq', 'gt', 'gte', 'lt', 'lte', 'startswith'}
+    handle_uniqueness = True
+    index_key_name = 'text-range'
+
+    separator = u':%s-SEPARATOR:' % index_key_name.upper()
+
+    def __init__(self, field):
+        super(TextRangeIndex, self).__init__(field)
+
+        try:
+            model = self.model
+        except AttributeError:
+            # index not yet tied to an field tied to a model
+            pass
+        else:
+            if not self.model.database.support_zrangebylex():
+                raise LimpydException(
+                    'Your redis version %s does not seems to support ZRANGEBYLEX '
+                    'so range indexes are not usable' % (
+                        '.'.join(str(part) for part in self.model.database.redis_version)
+                    )
+                )
+
+    def get_storage_key(self, *args):
+        """Return the redis key where to store the index for the given "value" (`args`)
+
+        For this index, we store all PKs having for a field in the same sorted-set.
+        Key has this form:
+        model-name:field-name:sub-field-name:text-range
+        The ':sub-field-name part' is repeated for each entry in *args that is not the final value
+
+        Parameters
+        -----------
+        args: tuple
+            All the "values" to take into account to get the storage key. The last entry,
+            the final value, is not used.
+
+        Returns
+        -------
+        str
+            The redis key to use
+
+        """
+
+        args = list(args)
+        args.pop()  # final value, not needed for the storage key
+
+        parts = [
+            self.model._name,
+            self.field.name,
+        ] + args + [
+            self.index_key_name,
+        ]
+
+        return self.field.make_key(*parts)
+
+    def check_uniqueness(self, *args, **kwargs):
+        """Check if the given "value" (via `args`) is unique or not.
+
+        For the parameters, see ``BaseIndex.check_uniqueness``
+
+        """
+
+        if not self.field.unique:
+            return
+
+        try:
+            pk = self.instance.pk.get()
+        except AttributeError:
+            pk = None
+
+        key = self.get_storage_key(*args)
+        value = list(args)[-1]
+        pks = self.get_pks_for_filter(key, 'eq', self.normalize_value(value))
+
+        self.assert_pks_uniqueness(pks, pk, value)
+
+    def _prepare_value_for_storage(self, value, pk):
+        """Prepare the value to be stored in the zset: value and pk separated
+
+        Parameters
+        ----------
+        value: any
+            The value, to normalize, to use
+        pk: any
+            The pk, that will be stringified
+
+        Returns
+        -------
+        str
+            The string ready to use as member of the sorted set.
+
+        """
+        normalized_value = self.normalize_value(value)
+        return self.separator.join([normalized_value, str(pk)])
+
+    def _extract_value_from_storage(self, string):
+        """Taking a string that was a member of the zset, extract the value and pk
+
+        Parameters
+        ----------
+        string: str
+            The member extracted from the sorted set
+
+        Returns
+        -------
+        tuple
+            Tuple with the value and the pk, extracted from the string
+
+        """
+        parts = string.split(self.separator)
+        pk = parts.pop()
+        return self.separator.join(parts), pk
+
+    def add(self, *args, **kwargs):
+        """Add the instance tied to the field for the given "value" (via `args`) to the index
+
+        For the parameters, see ``BaseIndex.add``
+
+        """
+
+        check_uniqueness = kwargs.get('check_uniqueness', True)
+
+        if self.field.unique and check_uniqueness:
+            self.check_uniqueness(*args)
+
+        key = self.get_storage_key(*args)
+
+        args = list(args)
+        value = args[-1]
+
+        # We add a string "value:pk" to the storage sorted-set, with a score of 0.
+        # Then when filtering will get then lexicographical ordered
+        # And we'll later be able to extract the pk for each returned values
+
+        pk = self.instance.pk.get()
+        logger.debug("adding %s to index %s" % (pk, key))
+        self.connection.zadd(key, 0, self._prepare_value_for_storage(value, pk))
+        self._indexed_values.add(tuple(args))
+
+    def remove(self, *args):
+        """Remove the instance tied to the field for the given "value" (via `args`) from the index
+
+        For the parameters, see ``BaseIndex.remove``
+
+        """
+
+        key = self.get_storage_key(*args)
+
+        args = list(args)
+        value = args[-1]
+
+        pk = self.instance.pk.get()
+        logger.debug("removing %s from index %s" % (pk, key))
+        self.connection.zrem(key, self._prepare_value_for_storage(value, pk))
+        self._deindexed_values.add(tuple(args))
+
+    def get_lex_boundaries(self, filter_type, value):
+        """Compute the boundaries to pass to zrangebylex depending of the filter type
+
+        Parameters
+        ----------
+        filter_type: str
+            One of the filter suffixes in ``self.handled_suffixes``
+        value: str
+            The normalized value for which we want the boundaries
+
+        Returns
+        -------
+        tuple
+            A tuple with two entries, the begin and the end of the boundaries to pass
+            to zrangebylex
+
+        Notes
+        -----
+        For zrangebylex:
+        - `(` means "not included"
+        - `[` means "included"
+        - `\xff` is the last char, it allows to say "starting with"
+
+        """
+
+        assert filter_type in self.handled_suffixes
+
+        start = '-'  # from the very start
+        end = '+'  # to the very end
+
+        if filter_type in (None, 'eq'):
+            # we include the separator to only get the members with the exact value
+            start = u'[%s%s' % (value, self.separator)
+            end = start.encode('utf-8') + b'\xff'
+
+        elif filter_type == 'gt':
+            # starting at the value, excluded
+            start = u'(%s' % value
+
+        elif filter_type == 'gte':
+            # starting at the value, included
+            start = u'[%s' % value
+
+        if filter_type == 'lt':
+            # ending with the value, excluded
+            end = u'(%s' % value
+
+        elif filter_type == 'lte':
+            # ending with the value, included (but not starting with, hence the separator)
+            end = u'[%s%s' % (value, self.separator)
+            end = end.encode('utf-8') + b'\xff'
+
+        elif filter_type == 'startswith':
+            # using `\xff` to simulate "startswith"
+            start = u'[%s' % value
+            end = start.encode('utf-8') + b'\xff'
+
+        return start, end
+
+    def get_pks_for_filter(self, key, filter_type, value):
+        """Extract the pks from the zset key for the given type and value
+
+        It is used by the uniqueness check to extract the pks for the given value
+
+        Parameters
+        ----------
+        key: str
+            The key of the redis sorted-set to use
+        filter_type: str
+            One of ``self.handled_suffixes``
+        value:
+            The normalized value for which we want the pks
+
+        Returns
+        -------
+        list
+            The list of instances PKs extracted from the sorted set
+
+        """
+        start, end = self.get_lex_boundaries(filter_type, value)
+        members = self.connection.zrangebylex(key, start, end)
+        if filter_type in ('lt', 'gt'):
+            # special case where we don't want the exact given value, but we cannot
+            # exclude it from the sorted set directly
+            return [
+                member_pk
+                for member_value, member_pk in
+                [self._extract_value_from_storage(member) for member in members]
+                if member_value != value
+            ]
+        else:
+            return [self._extract_value_from_storage(member)[-1] for member in members]
+
+    def get_filtered_key(self, suffix, *args, **kwargs):
+        """Returns the index key for the given args "value" (`args`)
+
+        For the parameters, see ``BaseIndex.get_filtered_key``
+
+        For now, the values are retrieved from redis then put back in a redis set/zset.
+        This should be done via redis scripting if possible.
+
+        """
+
+        accepted_key_types = kwargs.get('accepted_key_types', None)
+
+        if accepted_key_types\
+                and 'set' not in accepted_key_types and 'zset' not in accepted_key_types:
+            raise ImplementationError(
+                '%s can only return keys of type "set" or "zset"' % self.__class__.__name__
+            )
+
+        key = self.get_storage_key(*args)
+        pks = self.get_pks_for_filter(key, suffix, self.normalize_value(list(args)[-1]))
+
+        tmp_key = unique_key(self.connection)
+        if not accepted_key_types or 'set' in accepted_key_types:
+            if pks:
+                self.connection.sadd(tmp_key, *pks)
+            key_type = 'set'
+        elif 'zset' in accepted_key_types:
+            if pks:
+                self.connection.zadd(tmp_key, **{pk: idx for idx, pk in enumerate(pks)})
+            key_type = 'zset'
+
+        return tmp_key, key_type, True
diff --git a/tests/base.py b/tests/base.py
@@ -5,7 +5,7 @@
 import sys
 import unittest
 
-from redis import VERSION as redispy_version
+from redis import VERSION as redispy_version, Redis
 
 from limpyd.database import (RedisDatabase, DEFAULT_CONNECTION_SETTINGS)
 
@@ -100,3 +100,10 @@ def __exit__(self, exc_type, exc_value, traceback):
                 executed, self.num
             )
         )
+
+
+skip_if_no_zrangebylex = (
+    not hasattr(Redis, 'zrangebylex'),
+    'Redis-py %s does not support zrangebylex' % '.'.join(map(str, redispy_version))
+)
+
diff --git a/tests/contrib/collection.py b/tests/contrib/collection.py
@@ -8,8 +8,9 @@
 from limpyd.contrib.collection import ExtendedCollectionManager, SORTED_SCORE, DEFAULT_STORE_TTL
 from limpyd.utils import unique_key
 from limpyd.exceptions import *
+from tests.indexes import TextRangeIndexTestModel
 
-from ..base import LimpydBaseTest, test_database
+from ..base import LimpydBaseTest, test_database, skip_if_no_zrangebylex
 from ..model import TestRedisModel, Boat as BaseBoat
 
 
@@ -84,6 +85,25 @@ def test_extended_collection_should_work_as_simple_one(self):
         self.assertEqual(len(active_names), 2)
         self.assertEqual(active_names, ['bar', 'foo'])
 
+    @unittest.skipIf(*skip_if_no_zrangebylex)
+    def test_range_index_should_work(self):
+        class TextRangeIndexTestModelExtended(TextRangeIndexTestModel):
+            collection_manager = ExtendedCollectionManager
+
+        obj1 = TextRangeIndexTestModelExtended(name='foo', category='cat1')
+        pk1 = obj1.pk.get()
+        TextRangeIndexTestModelExtended(name='bar')
+        obj3 = TextRangeIndexTestModelExtended(name='foobar', category='cat1')
+        pk3 = obj3.pk.get()
+        TextRangeIndexTestModelExtended(name='foobar', category='cat2')
+        TextRangeIndexTestModelExtended(name='qux')
+
+        data = set(TextRangeIndexTestModelExtended.collection(name__gte='foo', category='cat1'))
+        self.assertEqual(data, {
+            pk1,  # foo and cat1
+            pk3,  # foobar and cat1
+        })
+
 
 class FieldOrModelAsValueForSortAndFilterTest(BaseTest):