Skip to content

Commit

Permalink
Closes Bears-R-Us#3177 Index.sort_values
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts committed May 29, 2024
1 parent 8cc410c commit 6d4af0f
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 15 deletions.
80 changes: 79 additions & 1 deletion arkouda/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,82 @@ def equals(self, other: Index) -> bool:
else:
return akall(self == other)

def _reindex(self, perm):
if isinstance(self, MultiIndex):
return MultiIndex(self[perm].levels, name=self.name, names=self.names)
elif isinstance(self.values, list):
return Index(self.values[perm], name=self.name, allow_list=True)
else:
return Index(self.values[perm], name=self.name)

@typechecked
def sort_values(self, ascending: bool = True) -> Index:
"""
Return a sorted copy of the index.
Return a sorted copy of the index, and optionally return the indices
that sorted the index itself.
Parameters
----------
return_indexer : bool, default False
Should the indices that would sort the index be returned.
ascending : bool, default True
Should the index values be sorted in an ascending order.
na_position : {'first' or 'last'}, default 'last'
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
the end.
key : callable, optional
If not None, apply the key function to the index values
before sorting. This is similar to the `key` argument in the
builtin :meth:`sorted` function, with the notable difference that
this `key` function should be *vectorized*. It should expect an
``Index`` and return an ``Index`` of the same shape.
Returns
-------
sorted_index : pandas.Index
Sorted copy of the index.
indexer : numpy.ndarray, optional
The indices that the index itself was sorted by.
See Also
--------
Series.sort_values : Sort values of a Series.
DataFrame.sort_values : Sort values in a DataFrame.
Examples
--------
>>> idx = pd.Index([10, 100, 1, 1000])
>>> idx
Index([10, 100, 1, 1000], dtype='int64')
Sort values in ascending order (default behavior).
>>> idx.sort_values()
Index([1, 10, 100, 1000], dtype='int64')
Sort values in descending order, and also get the indices `idx` was
sorted by.
>>> idx.sort_values(ascending=False, return_indexer=True)
(Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
"""

if isinstance(self, MultiIndex):
perm = coargsort(self.levels, ascending=ascending)
elif isinstance(self.values, list):
from numpy import argsort as np_argsort

if ascending is True:
perm = np_argsort(self.values).tolist()
else:
perm = np_argsort(self.values)[::-1].tolist()
else:
perm = argsort(self.values, ascending=ascending)
return self._reindex(perm)

def memory_usage(self, unit="B"):
"""
Return the memory usage of the Index values.
Expand Down Expand Up @@ -1047,13 +1123,15 @@ class MultiIndex(Index):

def __init__(
self,
levels: Union[list, pdarray, Strings, Categorical],
levels: Union[list, tuple, pdarray, Strings, Categorical],
name: Optional[str] = None,
names: Optional[list[str]] = None,
):
self.registered_name: Optional[str] = None
if not (isinstance(levels, list) or isinstance(levels, tuple)):
raise TypeError("MultiIndex should be an iterable")
elif isinstance(levels, tuple):
levels = list(levels)
self.levels = levels
first = True
self._names = names
Expand Down
19 changes: 8 additions & 11 deletions arkouda/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,8 +685,8 @@ def sort_index(self, ascending: bool = True) -> Series:
A new Series sorted.
"""

idx = self.index.argsort(ascending=ascending)
return self._reindex(idx)
perm = self.index.argsort(ascending=ascending)
return self._reindex(perm)

@typechecked
def sort_values(self, ascending: bool = True) -> Series:
Expand All @@ -701,21 +701,18 @@ def sort_values(self, ascending: bool = True) -> Series:
-------
A new Series sorted smallest to largest
"""

from arkouda.util import is_numeric
if not ascending:
if isinstance(self.values, pdarray) and self.values.dtype in (
int64,
float64,
):
if isinstance(self.values, pdarray) and is_numeric(self.values):
# For numeric values, negation reverses sort order
idx = argsort(-self.values)
perm = argsort(-self.values)
else:
# For non-numeric values, need the descending arange because reverse slicing
# is not supported
idx = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
perm = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
else:
idx = argsort(self.values)
return self._reindex(idx)
perm = argsort(self.values)
return self._reindex(perm)

@typechecked
def tail(self, n: int = 10) -> Series:
Expand Down
20 changes: 17 additions & 3 deletions arkouda/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

def argsort(
pda: Union[pdarray, Strings, "Categorical"], # type: ignore # noqa
ascending: bool = True,
algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
axis: int_scalars = 0,
) -> pdarray:
Expand Down Expand Up @@ -80,13 +81,20 @@ def argsort(
"axis": axis,
},
)
return create_pdarray(cast(str, repMsg))
sorted_array = create_pdarray(cast(str, repMsg))
if ascending is True:
return sorted_array
else:
from arkouda import arange

return sorted_array[arange(sorted_array.size - 1, -1, -1)]


def coargsort(
arrays: Sequence[Union[Strings, pdarray, "Categorical"]], # type: ignore # noqa
algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
) -> pdarray:
ascending=True,
) -> pdarray: # type: ignore
"""
Return the permutation that groups the rows (left-to-right), if the
input arrays are treated as columns. The permutation sorts numeric
Expand Down Expand Up @@ -182,7 +190,13 @@ def coargsort(
"arr_types": atypes,
},
)
return create_pdarray(cast(str, repMsg))
sorted_array = create_pdarray(cast(str, repMsg))
if ascending is True:
return sorted_array
else:
from arkouda import arange

return sorted_array[arange(sorted_array.size - 1, -1, -1)]


@typechecked
Expand Down

0 comments on commit 6d4af0f

Please sign in to comment.