Skip to content

Commit

Permalink
Add a way to clean/rebuild indexes
Browse files Browse the repository at this point in the history
Fixes #99
  • Loading branch information
twidi committed Jan 29, 2018
1 parent 0d69492 commit 9d33460
Show file tree
Hide file tree
Showing 7 changed files with 872 additions and 6 deletions.
39 changes: 39 additions & 0 deletions doc/collections.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ And, of course, you can use fields with different indexes in the same query:
>>> Person.collection(birth_year__gte=1960, lastname='Doe', nickname__startswith='S').instances()
[<[4] Susan "Sue" Doe (1960)>]
Configuration
-------------
If you want to use an index with a different behavior, you can use the `configure` class method of the index. Note that you can also create a new class by yourself but we provide this ability.
It accepts one or many arguments (`prefix`, `transform` and `handle_uniqueness`) and returns a new index class to be passed to the `indexes` argument of the field.
Expand Down Expand Up @@ -290,6 +293,42 @@ It will simply override the default value set on the index class. Useful if your
Note that if your field is marked as `unique`, you'll need to have at least one index capable of handling uniqueness.
Clean and rebuild
-----------------
Before removing an index from the field declaration, you have to clean it, else the data will stay in redis.
For this, use the `clean_indexes` method of the field.
.. code:: python
>>> MyModel.get_field('myfield').clean_indexes()
You can also rebuild them:
.. code:: python
>>> MyModel.get_field('myfield').rebuild_indexes()
You can pass the named argument `index_class` to limit the clean/rebuild to only indexes of this class.
Say you defined your own index:
.. code:: python
>>> MyIndex = EqualIndex(key='yolo', transform=lambda value: 'yolo' + value)
>>> class MyModel(RedisModel):
... myfield = model.StringField(indexable=True, indexes=[TextRangeIndex, EqualIndex])
You can clear/rebuild only your own index this way:
.. code:: python
>>> MyModel.get_field('myfield').clear(index_class=MyIndex)
Laziness
========
Expand Down
12 changes: 12 additions & 0 deletions limpyd/contrib/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,18 @@ def get_filtered_keys(self, suffix, *args, **kwargs):
if index.can_handle_suffix(suffix):
return index.get_filtered_keys(suffix, *args, **kwargs)

def get_all_storage_keys(self):
"""Returns the keys to be removed by `clear` in aggressive mode
For the parameters, see BaseIndex.get_all_storage_keys
"""

keys = set()
for index in self._indexes:
keys.update(index.get_all_storage_keys())

return keys


# This is a multi-indexes managing the different parts of a date in the format YYYY-MM-SS
DateIndexParts = MultiIndexes.compose([
Expand Down
23 changes: 23 additions & 0 deletions limpyd/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,29 @@ def call_script(self, script_dict, keys=None, args=None):
script_dict['script_object'] = self.connection.register_script(script_dict['lua'])
return script_dict['script_object'](keys=keys, args=args, client=self.connection)

def scan_keys(self, pattern):
"""Take a pattern expected by the redis `scan` command and return all mathing keys
Parameters
----------
pattern: str
The pattern of keys to look for
Returns
-------
set
Set of all the keys found with this pattern
"""
cursor = 0
all_keys = set()
while True:
cursor, keys = self.connection.scan(cursor, pattern)
all_keys.update(keys)
if not cursor or cursor == '0': # string for redis.py < 2.10
break
return all_keys


class Lock(redis.client.Lock):
"""
Expand Down
168 changes: 162 additions & 6 deletions limpyd/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from future.builtins import zip
from future.utils import with_metaclass

from inspect import isclass
from logging import getLogger
from copy import copy

Expand Down Expand Up @@ -384,6 +385,27 @@ def _indexes(self):

return [index_class(field=self) for index_class in self.index_classes]

def has_index(self, index):
"""Tells if the field have an index matching the current one
Parameters
-----------
index: type or BaseIndex
It could be an index instance, or an index class
Returns
-------
bool
Will be ``True`` if the current field has an index that is an instance
of the given index or of the class of the given index
"""
klass = index if isclass(index) else index.__class__
for one_index in self._indexes:
if isinstance(one_index, klass):
return True
return False

def _attach_to_model(self, model):
"""
Attach the current field to a model. Can be overriden to do something
Expand All @@ -399,6 +421,20 @@ def _attach_to_instance(self, instance):
self._instance = instance
self.lockable = self.lockable and instance.lockable

@property
def attached_to_model(self):
"""Tells if the current field is the one attached to the model, not instance"""
try:
if not bool(self._model):
return False
except AttributeError:
return False
else:
try:
return not bool(self._instance)
except AttributeError:
return True

def _call_command(self, name, *args, **kwargs):
"""
Add lock management and call parent.
Expand Down Expand Up @@ -434,34 +470,125 @@ def _reset_indexes_caches(self):
for index in self._indexes:
index._reset_cache()

def index(self, value=None):
def index(self, value=None, only_index=None):
"""
Handle field index process.
"""
assert self.indexable, "Field not indexable"
assert not only_index or self.has_index(only_index), "Invalid index"
if only_index:
only_index = only_index if isclass(only_index) else only_index.__class__

if value is None:
value = self.proxy_get()

if value is not None:
needs_to_check_uniqueness = bool(self.unique)

for index in self._indexes:
if only_index and not isinstance(index, only_index):
continue

index.add(value, check_uniqueness=needs_to_check_uniqueness and index.handle_uniqueness)

if needs_to_check_uniqueness and index.handle_uniqueness:
# uniqueness check is done for this value
needs_to_check_uniqueness = False

def deindex(self, value=None):
def deindex(self, value=None, only_index=None):
"""
Run process of deindexing field value(s).
"""
assert self.indexable, "Field not indexable"
assert not only_index or self.has_index(only_index), "Invalid index"
if only_index:
only_index = only_index if isclass(only_index) else only_index.__class__

if value is None:
value = self.proxy_get()

if value is not None:
for index in self._indexes:
if only_index and not isinstance(index, only_index):
continue
index.remove(value)

def clear_indexes(self, chunk_size=1000, aggressive=False, index_class=None):
"""Clear all indexes tied to this field
Parameters
----------
chunk_size: int
Default to 1000, it's the number of instances to load at once if not in aggressive mode.
aggressive: bool
Default to ``False``. When ``False``, the actual collection of instances will
be ran through to deindex all the values.
But when ``True``, the database keys will be scanned to find keys that matches the
pattern of the keys used by the indexes. This is a lot faster and may find forgotten keys.
But may also find keys not related to the index.
Should be set to ``True`` if you are not sure about the already indexed values.
index_class: type
Allow to clear only index(es) for this index class instead of all indexes.
Raises
------
AssertionError
If called from an instance field. It must be called from the model field
Also raised if the field is not indexable
Examples
--------
>>> MyModel.get_field('myfield').clear_indexes()
>>> MyModel.get_field('myfield').clear_indexes(index_class=MyIndex)
"""
assert self.indexable, "Field not indexable"
assert self.attached_to_model, \
'`rebuild_indexes` can only be called on a field attached to the model'

for index in self._indexes:
if index_class and not isinstance(index, index_class):
continue
index.clear(chunk_size=chunk_size, aggressive=aggressive)

def rebuild_indexes(self, chunk_size=1000, aggressive_clear=False, index_class=None):
"""Rebuild all indexes tied to this field
Parameters
----------
chunk_size: int
Default to 1000, it's the number of instances to load at once.
aggressive_clear: bool
Will be passed to the `aggressive` argument of the `clear_indexes` method.
If `False`, all values will be normally deindexed. If `True`, the work
will be done at low level, scanning for keys that may match the ones used by the indexes
index_class: type
Allow to build only index(es) for this index class instead of all indexes.
Raises
------
AssertionError
If called from an instance field. It must be called from the model field
Also raised if the field is not indexable
Examples
--------
>>> MyModel.get_field('myfield').rebuild_indexes()
>>> MyModel.get_field('myfield').clear_indexes(index_class=MyIndex)
"""
assert self.indexable, "Field not indexable"
assert self.attached_to_model, \
'`rebuild_indexes` can only be called on a field attached to the model'

for index in self._indexes:
if index_class and not isinstance(index, index_class):
continue
index.rebuild(chunk_size=chunk_size, aggressive_clear=aggressive_clear)

def get_unique_index(self):
assert self.unique, "Field not unique"

Expand Down Expand Up @@ -613,34 +740,49 @@ def _pop(self, command, *args, **kwargs):
self.deindex([result])
return result

def index(self, values=None):
def index(self, values=None, only_index=None):
"""
Index all values stored in the field, or only given ones if any.
"""
assert self.indexable, "Field not indexable"
assert not only_index or self.has_index(only_index), "Invalid index"
if only_index:
only_index = only_index if isclass(only_index) else only_index.__class__

if values is None:
values = self.proxy_get()

for value in values:
if value is not None:
needs_to_check_uniqueness = bool(self.unique)

for index in self._indexes:
if only_index and not isinstance(index, only_index):
continue

index.add(value, check_uniqueness=needs_to_check_uniqueness and index.handle_uniqueness)

if needs_to_check_uniqueness and index.handle_uniqueness:
# uniqueness check is done for this value
needs_to_check_uniqueness = False

def deindex(self, values=None):
def deindex(self, values=None, only_index=None):
"""
Deindex all values stored in the field, or only given ones if any.
"""
assert self.indexable, "Field not indexable"
assert not only_index or self.has_index(only_index), "Invalid index"
if only_index:
only_index = only_index if isclass(only_index) else only_index.__class__

if not values:
values = self.proxy_get()

for value in values:
if value is not None:
for index in self._indexes:
if only_index and not isinstance(index, only_index):
continue
index.remove(value)

def check_uniqueness(self, values):
Expand Down Expand Up @@ -898,34 +1040,48 @@ def _call_hmget(self, command, *args):
# redispy needs a list, not args
return self._traverse_command(command, args)

def index(self, values=None):
def index(self, values=None, only_index=None):
"""
Deal with dicts and field names.
"""
assert self.indexable, "Field not indexable"
assert not only_index or self.has_index(only_index), "Invalid index"
if only_index:
only_index = only_index if isclass(only_index) else only_index.__class__

if values is None:
values = self.proxy_get()

for field_name, value in iteritems(values):
if value is not None:
needs_to_check_uniqueness = bool(self.unique)

for index in self._indexes:
if only_index and not isinstance(index, only_index):
continue

index.add(field_name, value, check_uniqueness=needs_to_check_uniqueness and index.handle_uniqueness)

if needs_to_check_uniqueness and index.handle_uniqueness:
# uniqueness check is done for this value
needs_to_check_uniqueness = False

def deindex(self, values=None):
def deindex(self, values=None, only_index=None):
"""
Deal with dicts and field names.
"""
assert self.indexable, "Field not indexable"
assert not only_index or self.has_index(only_index), "Invalid index"
if only_index:
only_index = only_index if isclass(only_index) else only_index.__class__

if values is None:
values = self.proxy_get()
for field_name, value in iteritems(values):
if value is not None:
for index in self._indexes:
if only_index and not isinstance(index, only_index):
continue
index.remove(field_name, value)

def hexists(self, key):
Expand Down
Loading

0 comments on commit 9d33460

Please sign in to comment.