From 9d3346023b2c79d53a571158218256923490cc96 Mon Sep 17 00:00:00 2001 From: "Stephane Angel (Twidi)" Date: Tue, 14 Feb 2017 15:55:29 +0100 Subject: [PATCH] Add a way to clean/rebuild indexes Fixes #99 --- doc/collections.rst | 39 ++++ limpyd/contrib/indexes.py | 12 ++ limpyd/database.py | 23 ++ limpyd/fields.py | 168 ++++++++++++++- limpyd/indexes.py | 168 +++++++++++++++ tests/contrib/indexes.py | 42 ++++ tests/indexes.py | 426 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 872 insertions(+), 6 deletions(-) diff --git a/doc/collections.rst b/doc/collections.rst index d534cc0..18f2414 100644 --- a/doc/collections.rst +++ b/doc/collections.rst @@ -236,6 +236,9 @@ And, of course, you can use fields with different indexes in the same query: >>> Person.collection(birth_year__gte=1960, lastname='Doe', nickname__startswith='S').instances() [<[4] Susan "Sue" Doe (1960)>] +Configuration +------------- + If you want to use an index with a different behavior, you can use the `configure` class method of the index. Note that you can also create a new class by yourself but we provide this ability. It accepts one or many arguments (`prefix`, `transform` and `handle_uniqueness`) and returns a new index class to be passed to the `indexes` argument of the field. @@ -290,6 +293,42 @@ It will simply override the default value set on the index class. Useful if your Note that if your field is marked as `unique`, you'll need to have at least one index capable of handling uniqueness. +Clean and rebuild +----------------- + +Before removing an index from the field declaration, you have to clean it, else the data will stay in redis. + +For this, use the `clean_indexes` method of the field. + +.. code:: python + + >>> MyModel.get_field('myfield').clean_indexes() + + +You can also rebuild them: + +.. code:: python + + >>> MyModel.get_field('myfield').rebuild_indexes() + + +You can pass the named argument `index_class` to limit the clean/rebuild to only indexes of this class. + +Say you defined your own index: + +.. code:: python + + >>> MyIndex = EqualIndex(key='yolo', transform=lambda value: 'yolo' + value) + >>> class MyModel(RedisModel): + ... myfield = model.StringField(indexable=True, indexes=[TextRangeIndex, EqualIndex]) + +You can clear/rebuild only your own index this way: + +.. code:: python + + >>> MyModel.get_field('myfield').clear(index_class=MyIndex) + + Laziness ======== diff --git a/limpyd/contrib/indexes.py b/limpyd/contrib/indexes.py index 73a9646..158d543 100644 --- a/limpyd/contrib/indexes.py +++ b/limpyd/contrib/indexes.py @@ -209,6 +209,18 @@ def get_filtered_keys(self, suffix, *args, **kwargs): if index.can_handle_suffix(suffix): return index.get_filtered_keys(suffix, *args, **kwargs) + def get_all_storage_keys(self): + """Returns the keys to be removed by `clear` in aggressive mode + + For the parameters, see BaseIndex.get_all_storage_keys + """ + + keys = set() + for index in self._indexes: + keys.update(index.get_all_storage_keys()) + + return keys + # This is a multi-indexes managing the different parts of a date in the format YYYY-MM-SS DateIndexParts = MultiIndexes.compose([ diff --git a/limpyd/database.py b/limpyd/database.py index 7d66fac..d5381f0 100644 --- a/limpyd/database.py +++ b/limpyd/database.py @@ -199,6 +199,29 @@ def call_script(self, script_dict, keys=None, args=None): script_dict['script_object'] = self.connection.register_script(script_dict['lua']) return script_dict['script_object'](keys=keys, args=args, client=self.connection) + def scan_keys(self, pattern): + """Take a pattern expected by the redis `scan` command and return all mathing keys + + Parameters + ---------- + pattern: str + The pattern of keys to look for + + Returns + ------- + set + Set of all the keys found with this pattern + + """ + cursor = 0 + all_keys = set() + while True: + cursor, keys = self.connection.scan(cursor, pattern) + all_keys.update(keys) + if not cursor or cursor == '0': # string for redis.py < 2.10 + break + return all_keys + class Lock(redis.client.Lock): """ diff --git a/limpyd/fields.py b/limpyd/fields.py index d7e350a..f350cea 100644 --- a/limpyd/fields.py +++ b/limpyd/fields.py @@ -5,6 +5,7 @@ from future.builtins import zip from future.utils import with_metaclass +from inspect import isclass from logging import getLogger from copy import copy @@ -384,6 +385,27 @@ def _indexes(self): return [index_class(field=self) for index_class in self.index_classes] + def has_index(self, index): + """Tells if the field have an index matching the current one + + Parameters + ----------- + index: type or BaseIndex + It could be an index instance, or an index class + + Returns + ------- + bool + Will be ``True`` if the current field has an index that is an instance + of the given index or of the class of the given index + + """ + klass = index if isclass(index) else index.__class__ + for one_index in self._indexes: + if isinstance(one_index, klass): + return True + return False + def _attach_to_model(self, model): """ Attach the current field to a model. Can be overriden to do something @@ -399,6 +421,20 @@ def _attach_to_instance(self, instance): self._instance = instance self.lockable = self.lockable and instance.lockable + @property + def attached_to_model(self): + """Tells if the current field is the one attached to the model, not instance""" + try: + if not bool(self._model): + return False + except AttributeError: + return False + else: + try: + return not bool(self._instance) + except AttributeError: + return True + def _call_command(self, name, *args, **kwargs): """ Add lock management and call parent. @@ -434,34 +470,125 @@ def _reset_indexes_caches(self): for index in self._indexes: index._reset_cache() - def index(self, value=None): + def index(self, value=None, only_index=None): """ Handle field index process. """ assert self.indexable, "Field not indexable" + assert not only_index or self.has_index(only_index), "Invalid index" + if only_index: + only_index = only_index if isclass(only_index) else only_index.__class__ if value is None: value = self.proxy_get() + if value is not None: needs_to_check_uniqueness = bool(self.unique) + for index in self._indexes: + if only_index and not isinstance(index, only_index): + continue + index.add(value, check_uniqueness=needs_to_check_uniqueness and index.handle_uniqueness) + if needs_to_check_uniqueness and index.handle_uniqueness: # uniqueness check is done for this value needs_to_check_uniqueness = False - def deindex(self, value=None): + def deindex(self, value=None, only_index=None): """ Run process of deindexing field value(s). """ assert self.indexable, "Field not indexable" + assert not only_index or self.has_index(only_index), "Invalid index" + if only_index: + only_index = only_index if isclass(only_index) else only_index.__class__ if value is None: value = self.proxy_get() + if value is not None: for index in self._indexes: + if only_index and not isinstance(index, only_index): + continue index.remove(value) + def clear_indexes(self, chunk_size=1000, aggressive=False, index_class=None): + """Clear all indexes tied to this field + + Parameters + ---------- + chunk_size: int + Default to 1000, it's the number of instances to load at once if not in aggressive mode. + aggressive: bool + Default to ``False``. When ``False``, the actual collection of instances will + be ran through to deindex all the values. + But when ``True``, the database keys will be scanned to find keys that matches the + pattern of the keys used by the indexes. This is a lot faster and may find forgotten keys. + But may also find keys not related to the index. + Should be set to ``True`` if you are not sure about the already indexed values. + index_class: type + Allow to clear only index(es) for this index class instead of all indexes. + + Raises + ------ + AssertionError + If called from an instance field. It must be called from the model field + Also raised if the field is not indexable + + Examples + -------- + + >>> MyModel.get_field('myfield').clear_indexes() + >>> MyModel.get_field('myfield').clear_indexes(index_class=MyIndex) + + """ + assert self.indexable, "Field not indexable" + assert self.attached_to_model, \ + '`rebuild_indexes` can only be called on a field attached to the model' + + for index in self._indexes: + if index_class and not isinstance(index, index_class): + continue + index.clear(chunk_size=chunk_size, aggressive=aggressive) + + def rebuild_indexes(self, chunk_size=1000, aggressive_clear=False, index_class=None): + """Rebuild all indexes tied to this field + + Parameters + ---------- + chunk_size: int + Default to 1000, it's the number of instances to load at once. + aggressive_clear: bool + Will be passed to the `aggressive` argument of the `clear_indexes` method. + If `False`, all values will be normally deindexed. If `True`, the work + will be done at low level, scanning for keys that may match the ones used by the indexes + index_class: type + Allow to build only index(es) for this index class instead of all indexes. + + Raises + ------ + AssertionError + If called from an instance field. It must be called from the model field + Also raised if the field is not indexable + + Examples + -------- + + >>> MyModel.get_field('myfield').rebuild_indexes() + >>> MyModel.get_field('myfield').clear_indexes(index_class=MyIndex) + + + """ + assert self.indexable, "Field not indexable" + assert self.attached_to_model, \ + '`rebuild_indexes` can only be called on a field attached to the model' + + for index in self._indexes: + if index_class and not isinstance(index, index_class): + continue + index.rebuild(chunk_size=chunk_size, aggressive_clear=aggressive_clear) + def get_unique_index(self): assert self.unique, "Field not unique" @@ -613,34 +740,49 @@ def _pop(self, command, *args, **kwargs): self.deindex([result]) return result - def index(self, values=None): + def index(self, values=None, only_index=None): """ Index all values stored in the field, or only given ones if any. """ assert self.indexable, "Field not indexable" + assert not only_index or self.has_index(only_index), "Invalid index" + if only_index: + only_index = only_index if isclass(only_index) else only_index.__class__ if values is None: values = self.proxy_get() + for value in values: if value is not None: needs_to_check_uniqueness = bool(self.unique) + for index in self._indexes: + if only_index and not isinstance(index, only_index): + continue + index.add(value, check_uniqueness=needs_to_check_uniqueness and index.handle_uniqueness) + if needs_to_check_uniqueness and index.handle_uniqueness: # uniqueness check is done for this value needs_to_check_uniqueness = False - def deindex(self, values=None): + def deindex(self, values=None, only_index=None): """ Deindex all values stored in the field, or only given ones if any. """ assert self.indexable, "Field not indexable" + assert not only_index or self.has_index(only_index), "Invalid index" + if only_index: + only_index = only_index if isclass(only_index) else only_index.__class__ if not values: values = self.proxy_get() + for value in values: if value is not None: for index in self._indexes: + if only_index and not isinstance(index, only_index): + continue index.remove(value) def check_uniqueness(self, values): @@ -898,34 +1040,48 @@ def _call_hmget(self, command, *args): # redispy needs a list, not args return self._traverse_command(command, args) - def index(self, values=None): + def index(self, values=None, only_index=None): """ Deal with dicts and field names. """ assert self.indexable, "Field not indexable" + assert not only_index or self.has_index(only_index), "Invalid index" + if only_index: + only_index = only_index if isclass(only_index) else only_index.__class__ if values is None: values = self.proxy_get() + for field_name, value in iteritems(values): if value is not None: needs_to_check_uniqueness = bool(self.unique) + for index in self._indexes: + if only_index and not isinstance(index, only_index): + continue + index.add(field_name, value, check_uniqueness=needs_to_check_uniqueness and index.handle_uniqueness) + if needs_to_check_uniqueness and index.handle_uniqueness: # uniqueness check is done for this value needs_to_check_uniqueness = False - def deindex(self, values=None): + def deindex(self, values=None, only_index=None): """ Deal with dicts and field names. """ assert self.indexable, "Field not indexable" + assert not only_index or self.has_index(only_index), "Invalid index" + if only_index: + only_index = only_index if isclass(only_index) else only_index.__class__ if values is None: values = self.proxy_get() for field_name, value in iteritems(values): if value is not None: for index in self._indexes: + if only_index and not isinstance(index, only_index): + continue index.remove(field_name, value) def hexists(self, key): diff --git a/limpyd/indexes.py b/limpyd/indexes.py index 9d200bb..66e2b51 100644 --- a/limpyd/indexes.py +++ b/limpyd/indexes.py @@ -214,6 +214,20 @@ def model(self): """Shortcut to get the model tied to the field tied to this index""" return self.field._model + @property + def attached_to_model(self): + """Tells if the current index is the one attached to the model field, not instance field""" + try: + if not bool(self.model): + return False + except AttributeError: + return False + else: + try: + return not bool(self.instance) + except AttributeError: + return True + @property def instance(self): """Shortcut to get the instance tied to the field tied to this index""" @@ -375,6 +389,105 @@ def remove(self, *args): """ raise NotImplementedError + def get_all_storage_keys(self): + """Returns the keys to be removed by `clear` in aggressive mode + + Returns + ------- + set + The set of all keys that matches the keys used by this index. + + """ + raise NotImplementedError + + def clear(self, chunk_size=1000, aggressive=False): + """Will deindex all the value for the current field + + Parameters + ---------- + chunk_size: int + Default to 1000, it's the number of instances to load at once if not in aggressive mode. + aggressive: bool + Default to ``False``. When ``False``, the actual collection of instances will + be ran through to deindex all the values. + But when ``True``, the database keys will be scanned to find keys that matches the + pattern of the keys used by the index. This is a lot faster and may find forsgotten keys. + But may also find keys not related to the index. + Should be set to ``True`` if you are not sure about the already indexed values. + + Raises + ------ + AssertionError + If called from an index tied to an instance field. It must be called from the model field + + Examples + -------- + + >>> MyModel.get_field('myfield')._indexes[0].clear() + + """ + assert self.attached_to_model, \ + '`clear` can only be called on an index attached to the model field' + + if aggressive: + keys = self.get_all_storage_keys() + with self.model.database.pipeline(transaction=False) as pipe: + for key in keys: + pipe.delete(key) + pipe.execute() + + else: + start = 0 + while True: + instances = self.model.collection().sort().instances(skip_exist_test=True)[start:start + chunk_size] + for instance in instances: + field = instance.get_instance_field(self.field.name) + value = field.proxy_get() + if value is not None: + field.deindex(value, only_index=self) + + if len(instances) < chunk_size: # not enough data, it means we are done + break + + start += chunk_size + + def rebuild(self, chunk_size=1000, aggressive_clear=False): + """Rebuild the whole index for this field. + + Parameters + ---------- + chunk_size: int + Default to 1000, it's the number of instances to load at once. + aggressive_clear: bool + Will be passed to the `aggressive` argument of the `clear` method. + If `False`, all values will be normally deindexed. If `True`, the work + will be done at low level, scanning for keys that may match the ones used by the index + + Examples + -------- + + >>> MyModel.get_field('myfield')._indexes[0].rebuild() + + """ + assert self.attached_to_model, \ + '`rebuild` can only be called on an index attached to the model field' + + self.clear(chunk_size=chunk_size, aggressive=aggressive_clear) + + start = 0 + while True: + instances = self.model.collection().sort().instances(skip_exist_test=True)[start:start + chunk_size] + for instance in instances: + field = instance.get_instance_field(self.field.name) + value = field.proxy_get() + if value is not None: + field.index(value, only_index=self) + + if len(instances) < chunk_size: # not enough data, it means we are done + break + + start += chunk_size + class EqualIndex(BaseIndex): """Default simple equal index.""" @@ -443,6 +556,35 @@ def get_storage_key(self, *args, **kwargs): return self.field.make_key(*parts) + def get_all_storage_keys(self): + """Returns the keys to be removed by `clear` in aggressive mode + + For the parameters, see BaseIndex.get_all_storage_keys + + """ + + parts1 = [ + self.model._name, + self.field.name, + ] + + parts2 = parts1 + ['*'] # for indexes taking args, like for hashfields + + if self.prefix: + parts1.append(self.prefix) + parts2.append(self.prefix) + + if self.key: + parts1.append(self.key) + parts2.append(self.key) + + parts1.append('*') + parts2.append('*') + + return self.model.database.scan_keys(self.field.make_key(*parts1)).union( + self.model.database.scan_keys(self.field.make_key(*parts2)) + ) + def check_uniqueness(self, *args, **kwargs): """Check if the given "value" (via `args`) is unique or not. @@ -548,6 +690,32 @@ def get_storage_key(self, *args): return self.field.make_key(*parts) + def get_all_storage_keys(self): + """Returns the keys to be removed by `clear` in aggressive mode + + For the parameters, see BaseIndex.get_all_storage_keys + + """ + + parts1 = [ + self.model._name, + self.field.name, + ] + + parts2 = parts1 + ['*'] # for indexes taking args, like for hashfields + + if self.prefix: + parts1.append(self.prefix) + parts2.append(self.prefix) + + if self.key: + parts1.append(self.key) + parts2.append(self.key) + + return self.model.database.scan_keys(self.field.make_key(*parts1)).union( + self.model.database.scan_keys(self.field.make_key(*parts2)) + ) + def prepare_value_for_storage(self, value, pk): """Prepare the value to be stored in the zset diff --git a/tests/contrib/indexes.py b/tests/contrib/indexes.py index 616bf41..e848ab1 100644 --- a/tests/contrib/indexes.py +++ b/tests/contrib/indexes.py @@ -169,6 +169,48 @@ class MultiIndexTestModel(TestRedisModel): {pk2} ) + def test_cleaning(self): + + index_class = MultiIndexes.compose([ + EqualIndex.configure( + prefix='first_letter', + transform=lambda v: v[0] if v else '', + handle_uniqueness=False + ), + EqualIndex + ]) + + class MultiIndexTestModel2(TestRedisModel): + name = fields.StringField(indexable=True, indexes=[index_class], unique=True) + + pk1 = MultiIndexTestModel2(name="foo").pk.get() + pk2 = MultiIndexTestModel2(name="bar").pk.get() + + index = MultiIndexTestModel2.get_field('name')._indexes[0] + + # check the keys, we should have the ones from both included index + keys = index.get_all_storage_keys() + self.assertSetEqual(keys, { + 'tests:multiindextestmodel2:name:foo', + 'tests:multiindextestmodel2:name:bar', + 'tests:multiindextestmodel2:name:first_letter:b', + 'tests:multiindextestmodel2:name:first_letter:f', + }) + + # clear the index + index.clear() + + # we should have nothing indexed + self.assertSetEqual(set(MultiIndexTestModel2.collection(name='foo')), set()) + self.assertSetEqual(set(MultiIndexTestModel2.collection(name__first_letter='b')), set()) + + # rebuild it + index.rebuild() + + # everything should be indexed + self.assertSetEqual(set(MultiIndexTestModel2.collection(name='foo')), {pk1}) + self.assertSetEqual(set(MultiIndexTestModel2.collection(name__first_letter='b')), {pk2}) + class DateTimeModelTest(TestRedisModel): date = fields.InstanceHashField(indexable=True, indexes=[DateIndex]) diff --git a/tests/indexes.py b/tests/indexes.py index 777bb0b..8786d91 100644 --- a/tests/indexes.py +++ b/tests/indexes.py @@ -675,3 +675,429 @@ def test_many_filters(self): self.assertEqual(data, { self.pk1, # -15, and cat1, and foo lte fooa }) + + +class CleanTestCase(LimpydBaseTest): + + def test_equal_index(self): + + class CleanModel1(TestRedisModel): + field = fields.StringField(indexable=True) + other_field = fields.StringField(indexable=True) + key_field = fields.StringField(indexable=True, indexes=[EqualIndex.configure(key='foo')]) + prefix_field = fields.StringField(indexable=True, indexes=[EqualIndex.configure(prefix='bar')]) + key_prefix_field = fields.StringField(indexable=True, indexes=[EqualIndex.configure(key='baz', prefix='qux')]) + hash_field = fields.HashField(indexable=True) + two_indexes_field = fields.StringField(indexable=True, indexes=[ + EqualIndex.configure(prefix='one'), + EqualIndex.configure(prefix='two', transform=lambda value: value[::-1]), + ]) + + pk1 = CleanModel1( + field='a', other_field='aa', key_field='aaa', prefix_field='aaaa', key_prefix_field='aaaaa', + hash_field={'aaaaaa1': 'AAAAAA1', 'aaaaaa2': 'AAAAAA2'}, + two_indexes_field='aaaaaaX', + ).pk.get() + pk2 = CleanModel1( + field='b', other_field='bb', key_field='bbb', prefix_field='bbbb', key_prefix_field='bbbbb', + hash_field={'bbbbbb1': 'BBBBBB1', 'bbbbbb2': 'BBBBBB2'}, + two_indexes_field='bbbbbbX', + ).pk.get() + + ### check simple index + index = CleanModel1.get_field('field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel1:field:a', + 'tests:cleanmodel1:field:b', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel1.collection(field='a')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel1.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel1.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(field='b')), {pk2}) + + # now for index with key + index = CleanModel1.get_field('key_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel1:key_field:foo:aaa', + 'tests:cleanmodel1:key_field:foo:bbb', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel1.collection(key_field='aaa')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel1.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel1.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_field='bbb')), {pk2}) + + ### now for index with prefix + index = CleanModel1.get_field('prefix_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel1:prefix_field:bar:aaaa', + 'tests:cleanmodel1:prefix_field:bar:bbbb', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='aaaa')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel1.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='bbbb')), {pk2}) + + ### now for index with key and prefix + index = CleanModel1.get_field('key_prefix_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel1:key_prefix_field:qux:baz:aaaaa', + 'tests:cleanmodel1:key_prefix_field:qux:baz:bbbbb', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='aaaaa')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel1.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='bbbbb')), {pk2}) + + ### now for index for hashfield + index = CleanModel1.get_field('hash_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel1:hash_field:aaaaaa1:AAAAAA1', + 'tests:cleanmodel1:hash_field:aaaaaa2:AAAAAA2', + 'tests:cleanmodel1:hash_field:bbbbbb1:BBBBBB1', + 'tests:cleanmodel1:hash_field:bbbbbb2:BBBBBB2', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa1='AAAAAA1')), set()) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa2='AAAAAA2')), set()) + self.assertSetEqual(set(CleanModel1.collection(hash_field__bbbbbb1='BBBBBB1')), set()) + self.assertSetEqual(set(CleanModel1.collection(hash_field__bbbbbb2='BBBBBB2')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel1.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__bbbbbb1='BBBBBB1')), {pk2}) + self.assertSetEqual(set(CleanModel1.collection(hash_field__bbbbbb2='BBBBBB2')), {pk2}) + + ### now for multi-indexes + index = CleanModel1.get_field('two_indexes_field')._indexes[1] # the reverse one + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel1:two_indexes_field:two:Xaaaaaa', + 'tests:cleanmodel1:two_indexes_field:two:Xbbbbbb', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), set()) + + # but other index still present + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + + # and other index still present + self.assertSetEqual(set(CleanModel1.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + + # both methods cannot be called from instance index + with self.assertRaises(AssertionError): + CleanModel1().get_field('field')._indexes[0].clear() + with self.assertRaises(AssertionError): + CleanModel1().get_field('field')._indexes[0].rebuild() + + @unittest.skipIf(*skip_if_no_zrangebylex) + def test_range_index(self): + + class CleanModel2(TestRedisModel): + field = fields.StringField(indexable=True, indexes=[TextRangeIndex]) + other_field = fields.StringField(indexable=True, indexes=[TextRangeIndex]) + key_field = fields.StringField(indexable=True, indexes=[TextRangeIndex.configure(key='foo')]) + prefix_field = fields.StringField(indexable=True, indexes=[TextRangeIndex.configure(prefix='bar')]) + key_prefix_field = fields.StringField(indexable=True, indexes=[TextRangeIndex.configure(key='baz', prefix='qux')]) + hash_field = fields.HashField(indexable=True, indexes=[TextRangeIndex]) + two_indexes_field = fields.StringField(indexable=True, indexes=[ + TextRangeIndex.configure(prefix='one'), + TextRangeIndex.configure(prefix='two', transform=lambda value: value[::-1]), + ]) + + pk1 = CleanModel2( + field='a', other_field='aa', key_field='aaa', prefix_field='aaaa', key_prefix_field='aaaaa', + hash_field={'aaaaaa1': 'AAAAAA1', 'aaaaaa2': 'AAAAAA2'}, + two_indexes_field='aaaaaaX', + ).pk.get() + pk2 = CleanModel2( + field='b', other_field='bb', key_field='bbb', prefix_field='bbbb', key_prefix_field='bbbbb', + hash_field={'bbbbbb1': 'BBBBBB1', 'bbbbbb2': 'BBBBBB2'}, + two_indexes_field='bbbbbbX', + ).pk.get() + + ### check simple index + index = CleanModel2.get_field('field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel2:field:text-range', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel2.collection(field='a')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel2.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel2.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(field='b')), {pk2}) + + # now for index with key + index = CleanModel2.get_field('key_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel2:key_field:foo', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel2.collection(key_field='aaa')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel2.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel2.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_field='bbb')), {pk2}) + + ### now for index with prefix + index = CleanModel2.get_field('prefix_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel2:prefix_field:bar:text-range', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='aaaa')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel2.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='bbbb')), {pk2}) + + ### now for index with key and prefix + index = CleanModel2.get_field('key_prefix_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel2:key_prefix_field:qux:baz', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='aaaaa')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel2.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='bbbbb')), {pk2}) + + ### now for index for hashfield + index = CleanModel2.get_field('hash_field')._indexes[0] + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel2:hash_field:aaaaaa1:text-range', + 'tests:cleanmodel2:hash_field:aaaaaa2:text-range', + 'tests:cleanmodel2:hash_field:bbbbbb1:text-range', + 'tests:cleanmodel2:hash_field:bbbbbb2:text-range', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa1='AAAAAA1')), set()) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa2='AAAAAA2')), set()) + self.assertSetEqual(set(CleanModel2.collection(hash_field__bbbbbb1='BBBBBB1')), set()) + self.assertSetEqual(set(CleanModel2.collection(hash_field__bbbbbb2='BBBBBB2')), set()) + + # but index for other fields are still present + self.assertSetEqual(set(CleanModel2.collection(other_field='aa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(field='a')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_field='aaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(prefix_field__bar='aaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(key_prefix_field__qux='aaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa1='AAAAAA1')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__aaaaaa2='AAAAAA2')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__bbbbbb1='BBBBBB1')), {pk2}) + self.assertSetEqual(set(CleanModel2.collection(hash_field__bbbbbb2='BBBBBB2')), {pk2}) + + ### now for multi-indexes + index = CleanModel2.get_field('two_indexes_field')._indexes[1] # the reverse one + + # check we have the keys + self.assertSetEqual(index.get_all_storage_keys(), { + 'tests:cleanmodel2:two_indexes_field:two:text-range', + }) + + # check that they are deleted + index.clear() + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), set()) + + # but other index still present + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + + # check the index is rebuilt + index.rebuild() + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__two='Xaaaaaa')), {pk1}) + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + + # and other index still present + self.assertSetEqual(set(CleanModel2.collection(two_indexes_field__one='aaaaaaX')), {pk1}) + + def test_from_field(self): + + class CleanModel3(TestRedisModel): + two_indexes_field = fields.StringField(indexable=True, indexes=[ + EqualIndex.configure(prefix='one'), + EqualIndex.configure(prefix='two', transform=lambda value: value[::-1]), + ]) + + pk1 = CleanModel3(two_indexes_field='aX').pk.get() + pk2 = CleanModel3(two_indexes_field='bX').pk.get() + + # we clear all indexes + CleanModel3.get_field('two_indexes_field').clear_indexes() + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__one='aX')), set()) + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__two='Xa')), set()) + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__one='bX')), set()) + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__two='Xb')), set()) + + # and rebuild them + CleanModel3.get_field('two_indexes_field').rebuild_indexes() + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__one='aX')), {pk1}) + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__two='Xa')), {pk1}) + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__one='bX')), {pk2}) + self.assertSetEqual(set(CleanModel3.collection(two_indexes_field__two='Xb')), {pk2}) + + # this doesn't work from an instance + with self.assertRaises(AssertionError): + CleanModel3().two_indexes_field.clear_indexes() + with self.assertRaises(AssertionError): + CleanModel3().two_indexes_field.rebuild_indexes()