Skip to content

Commit

Permalink
Merge branch 'feature/twidi/correct-and-enhance-slicing' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
twidi committed Jan 29, 2018
2 parents 1c0482c + e21a27d commit 64d478f
Show file tree
Hide file tree
Showing 11 changed files with 444 additions and 208 deletions.
172 changes: 134 additions & 38 deletions limpyd/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

ParsedFilter = namedtuple('ParsedFilter', ['index', 'suffix', 'extra_field_parts', 'value'])

NONE_SLICE = slice(None, None, None)


class CollectionManager(object):
"""
Expand Down Expand Up @@ -41,7 +43,7 @@ def __init__(self, cls):
self._instances_skip_exist_test = False # If True will return instances
# without testing if pk exist
self._sort = None # Will store sorting parameters
self._slice = None # Will store slice parameters (start and num)
self._sort_limits = None # Will store slice parameters (start and num)
self._len = None # Store the result of the final collection, to avoid
# having to compute the whole thing twice when doing
# `list(Model.collection)` (a `list` will call
Expand All @@ -51,53 +53,142 @@ def __init__(self, cls):
# True by default to manage the __iter__ + __len__
# case, specifically set to False in other cases

self.scripted_slicing = True

def __iter__(self):
old_slice_and_len_mode = None if self._slice is None else self._slice.copy(), self._len_mode
old_sort_limits_and_len_mode = None if self._sort_limits is None else self._sort_limits.copy(), self._len_mode
try:
self._len_mode = False
return self._collection.__iter__()
finally:
self._slice, self._len_mode = old_slice_and_len_mode
self._sort_limits, self._len_mode = old_sort_limits_and_len_mode

def _optimize_slice(self, the_slice, can_reverse):
"""
Parameters
----------
the_slice: slice
The python slice to optimize
can_reverse: bool
If we can ask redis to reverse the data before slicing.
Returns
-------
tuple
Four elements:
- optimized: bool
If the slice is optimized
- start: int or None
start item to pass to the redis command
If None, we want from the very start
- count: int or None
number of items to ask the redi command
If None, we want everything from the start
- rev: bool
If we should ask redis to work the reverse way
- python_slice
The slice to be applied on the python side
"""
step = 1 if the_slice.step in (1, None) else the_slice.step
start = (the_slice.start or 0) if step > 0 else the_slice.start
stop = the_slice.stop

if start is not None and stop is not None:

# cases where we know the result is always empty
if step > 0 and (0 <= stop <= start or stop <= start < 0):
return True, None, None, False, NONE_SLICE

if step < 0 and (0 <= start <= stop or start <= stop < 0):
return True, None, None, False, NONE_SLICE

# if start and stop are on the same side, we can optimize

# simplest case: start and stop are positive
if start >= 0 and stop > 0:
if step < 0:
start, stop = stop + 1, start + 1
return True, start, stop - start, False, slice(None, None, step if step != 1 else None)

# more complicated case: start and stop are negative
if start < 0 and stop < 0 and can_reverse:
if step < 0:
start, stop = stop + 1, start + 1
step = -step
return True, - stop, stop - start, True, slice(None, None, step if step != 1 else None)

# for positive step, if we have a positive start, and no end or negative one, we have to retrieve all from
# the start and slice here
if step > 0 and (start is not None and start >= 0) and (stop is None or stop < 0):
return True, start, None, False, slice(None, stop, step)

# the reverse for negative step
if step < 0 and (start is None or start < 0) and (stop is not None and stop >= 0):
return True, stop + 1, None, False, slice(start, None, step)

# in all other cases we recover the whole collection and slice it in python
return False, None, None, False, slice(start, stop, step)

def __getitem__(self, arg):
old_slice_and_len_mode = None if self._slice is None else self._slice.copy(), self._len_mode
old_sort_limits_and_len_mode = None if self._sort_limits is None else self._sort_limits.copy(), self._len_mode
old_sort = None if self._sort is None else self._sort.copy()
try:
self._len_mode = False
self._slice = {}
self._sort_limits = {}
self._optimized_slicing = False # just to store the fact that the call was optimized
if isinstance(arg, slice):
# A slice has been requested
# so add it to the sort parameters (via slice)
# and return the collection (a scliced collection is no more
# chainable, so we do not return `self`)
start = arg.start or 0
if start < 0:
# in case of a negative start, we can't use redis sort so
# we fetch all the collection before returning the wanted slice
return self._collection[arg]
self._slice['start'] = start
stop = arg.stop
# Redis expects a number of elements
# not a python style stop value
if stop is None:
# negative value for the count return all
self._slice['num'] = -1
else:
self._slice['num'] = stop - start
return self._collection
# We try to reduce the data to get back from redis
# when it's possible depending of the slice arguments.
# We use `self._sort_limits` to tell redis how to slice what
# it'll got from the `sort` command. We may also reverse
# `_sort['desc']`(the set will be read in reverse way as we wanted
# by redis).
# At the end we return the collection (a sliced collection
# is no more chainable, so we do not return `self`

optimized, start, count, rev, python_slice = self._optimize_slice(
arg,
can_reverse=True if not self._sort else self._sort.get('by', '') != 'nosort'
)

self._optimized_slicing = optimized

if optimized and start is None and count is None and python_slice == NONE_SLICE:
return []

if start is not None or count is not None:
self._sort_limits['start'] = start or 0
self._sort_limits['num'] = -1 if count is None else count

if rev:
if self._sort is None: self._sort = {}
self._sort['desc'] = not self._sort.get('desc', False)

return self._collection[python_slice]

else:
# A single item has been requested
# Nevertheless, use the redis pagination, to minimize
# data transfert and use the fast redis offset system
# data transfer and use the fast redis offset system
start = arg
self._sort_limits['num'] = 1 # one element
self._optimized_slicing = True
if start >= 0:
self._slice['start'] = start
self._slice['num'] = 1 # one element
self._sort_limits['start'] = start
return self._collection[0]
else:
# negative index, we have to fetch the whole collection first
return self._collection[start]
# we sort the result in the reverse way, mark the final result as
# reversed to re-reverse it at the end
if self._sort is None: self._sort = {}
self._sort['desc'] = not self._sort.get('desc', False)
self._sort_limits['start'] = - start - 1
return self._collection[0]
finally:
self._slice, self._len_mode = old_slice_and_len_mode
self._sort_limits, self._len_mode = old_sort_limits_and_len_mode
self._sort = old_sort

def _get_pk(self):
"""
Expand All @@ -115,13 +206,17 @@ def _get_pk(self):
def _prepare_sort_options(self, has_pk):
"""
Prepare "sort" options to use when calling the collection, depending
on "_sort" and "_slice" attributes
on "_sort" and "_sort_limits" attributes
"""
sort_options = {}
if self._sort is not None and not has_pk:
sort_options.update(self._sort)
if self._slice is not None:
sort_options.update(self._slice)
if self._sort_limits is not None:
if 'start' in self._sort_limits and 'num' not in self._sort_limits:
self._sort_limits['num'] = -1
elif 'num' in self._sort_limits and 'start' not in self._sort_limits:
self._sort_limits['start'] = 0
sort_options.update(self._sort_limits)
if not sort_options and self._sort is None:
sort_options = None
return sort_options
Expand All @@ -131,8 +226,8 @@ def _collection(self):
"""
Effectively retrieve data according to lazy_collection.
"""
old_slice_and_len_mode = None if self._slice is None else self._slice.copy(), self._len_mode
try: # try block to always reset the _slice in the "finally" part
old_sort_limits_and_len_mode = None if self._sort_limits is None else self._sort_limits.copy(), self._len_mode
try: # try block to always reset the _sort_limits in the "finally" part

conn = self.cls.get_connection()
self._len = 0
Expand Down Expand Up @@ -177,7 +272,7 @@ def _collection(self):
if pk and not self._lazy_collection['sets']:
# we have a pk without other sets, and no
# needs to get values so we can simply return the pk
collection = set([pk])
collection = {pk}
else:
# we have nothing
collection = {}
Expand All @@ -190,13 +285,14 @@ def _collection(self):
# Format return values if needed
return self._prepare_results(collection)
finally:
self._slice, self._len_mode = old_slice_and_len_mode
self._sort_limits, self._len_mode = old_sort_limits_and_len_mode

def _final_redis_call(self, final_set, sort_options):
"""
The final redis call to obtain the values to return from the "final_set"
with some sort options.
"""

conn = self.cls.get_connection()
if sort_options is not None:
# a sort, or values, call the SORT command on the set
Expand Down Expand Up @@ -412,12 +508,12 @@ def __len__(self):
return self._len

def __repr__(self):
old_slice_and_len_mode = None if self._slice is None else self._slice.copy(), self._len_mode
old_sort_limits_and_len_mode = None if self._sort_limits is None else self._sort_limits.copy(), self._len_mode
try:
self._len_mode = False
return self._collection.__repr__()
finally:
self._slice, self._len_mode = old_slice_and_len_mode
self._sort_limits, self._len_mode = old_sort_limits_and_len_mode

def instances(self, skip_exist_test=False):
"""
Expand Down
30 changes: 19 additions & 11 deletions limpyd/contrib/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,26 @@ def _collection(self):
Effectively retrieve data according to lazy_collection.
If we have a stored collection, without any result, return an empty list
"""
old_slice_and_len_mode = None if self._slice is None else self._slice.copy(), self._len_mode
old_sort_limits_and_len_mode = None if self._sort_limits is None else self._sort_limits.copy(), self._len_mode
old_sorts = None if self._sort is None else self._sort.copy(),\
None if self._sort_by_sortedset is None else self._sort_by_sortedset.copy()
try:
if self.stored_key and not self._stored_len:
if self._len_mode:
self._len = 0
self._len_mode = False
self._slice = {}
self._sort_limits = {}
return []

# Manage sort desc added by original `__getitem__` when we sort by score
if self._sort_by_sortedset and self._sort and self._sort.get('desc'):
self._sort = None
self._sort_by_sortedset['desc'] = not self._sort_by_sortedset.get('desc', False)

return super(ExtendedCollectionManager, self)._collection
finally:
self._slice, self._len_mode = old_slice_and_len_mode
self._sort_limits, self._len_mode = old_sort_limits_and_len_mode
self._sort, self._sort_by_sortedset = old_sorts

def _prepare_sets(self, sets):
"""
Expand Down Expand Up @@ -252,16 +261,15 @@ def _final_redis_call(self, final_set, sort_options):

# we have a sorted set without need to sort, use zrange
if self._has_sortedsets and sort_options is None:
# TODO: we may handle slicing here

return conn.zrange(final_set, 0, -1)

# we have a stored collection, without other filter, and no need to
# sort, use lrange
elif self.stored_key and not self._lazy_collection['sets']\
if self.stored_key and not self._lazy_collection['sets']\
and len(self._lazy_collection['intersects']) == 1\
and (sort_options is None or sort_options == {'by': 'nosort'}):

# TODO: we may handle slicing here
return conn.lrange(final_set, 0, -1)

# normal call
Expand Down Expand Up @@ -466,7 +474,7 @@ def _sort_by_sortedset_before(self):
Return True if we have to sort by set and do the stuff *before* asking
redis for the sort
"""
return self._sort_by_sortedset and self._slice and (not self._lazy_collection['pks']
return self._sort_by_sortedset and self._sort_limits and (not self._lazy_collection['pks']
or self._want_score_value)

@property
Expand All @@ -475,7 +483,7 @@ def _sort_by_sortedset_after(self):
Return True if we have to sort by set and do the stuff *after* asking
redis for the sort
"""
return self._sort_by_sortedset and not self._slice and (not self._lazy_collection['pks']
return self._sort_by_sortedset and not self._sort_limits and (not self._lazy_collection['pks']
or self._want_score_value)

@property
Expand Down Expand Up @@ -668,7 +676,7 @@ def store(self, key=None, ttl=DEFAULT_STORE_TTL):
DEFAULT_STORE_TTL, which is 60 secondes. You can pass None if you don't
want expiration.
"""
old_slice_and_len_mode = None if self._slice is None else self._slice.copy(), self._len_mode
old_sort_limits_and_len_mode = None if self._sort_limits is None else self._sort_limits.copy(), self._len_mode
try:
self._store = True

Expand Down Expand Up @@ -717,7 +725,7 @@ def store(self, key=None, ttl=DEFAULT_STORE_TTL):
return stored_collection

finally:
self._slice, self._len_mode = old_slice_and_len_mode
self._sort_limits, self._len_mode = old_sort_limits_and_len_mode

def from_stored(self, key):
"""
Expand All @@ -731,7 +739,7 @@ def from_stored(self, key):
# prepare the collection
self.stored_key = key
self.intersect(_StoredCollection(self.cls.get_connection(), key))
self.sort(by='nosort')
self.sort(by='nosort') # keep stored order

# count the number of results to manage empty result (to not behave like
# expired key)
Expand Down
Loading

0 comments on commit 64d478f

Please sign in to comment.