Skip to content

Commit

Permalink
Closes #3177 Index.sort_values
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts committed May 23, 2024
1 parent 38f3041 commit 6f8dab4
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 22 deletions.
73 changes: 71 additions & 2 deletions arkouda/index.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import json
from typing import TYPE_CHECKING, List, Optional, Union, Tuple
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

import pandas as pd # type: ignore
from numpy import array as ndarray
Expand Down Expand Up @@ -276,6 +276,73 @@ def equals(self, other: Union[Index, pdarray, Strings, Categorical, list]) -> bo

return akall(self == other)

def _reindex(self, perm):
if isinstance(self, MultiIndex):
return MultiIndex(self[perm].levels, name=self.name, names=self.names)
else:
return Index(self.values[perm], name=self.name)

@typechecked
def sort_values(self, ascending: bool = True) -> Index:
"""
Return a sorted copy of the index.
Return a sorted copy of the index, and optionally return the indices
that sorted the index itself.
Parameters
----------
return_indexer : bool, default False
Should the indices that would sort the index be returned.
ascending : bool, default True
Should the index values be sorted in an ascending order.
na_position : {'first' or 'last'}, default 'last'
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
the end.
key : callable, optional
If not None, apply the key function to the index values
before sorting. This is similar to the `key` argument in the
builtin :meth:`sorted` function, with the notable difference that
this `key` function should be *vectorized*. It should expect an
``Index`` and return an ``Index`` of the same shape.
Returns
-------
sorted_index : pandas.Index
Sorted copy of the index.
indexer : numpy.ndarray, optional
The indices that the index itself was sorted by.
See Also
--------
Series.sort_values : Sort values of a Series.
DataFrame.sort_values : Sort values in a DataFrame.
Examples
--------
>>> idx = pd.Index([10, 100, 1, 1000])
>>> idx
Index([10, 100, 1, 1000], dtype='int64')
Sort values in ascending order (default behavior).
>>> idx.sort_values()
Index([1, 10, 100, 1000], dtype='int64')
Sort values in descending order, and also get the indices `idx` was
sorted by.
>>> idx.sort_values(ascending=False, return_indexer=True)
(Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
"""

if isinstance(self, MultiIndex):
perm = coargsort(self.levels, ascending=ascending)
else:
perm = argsort(self.values, ascending=ascending)
return self._reindex(perm)

def memory_usage(self, unit="B"):
"""
Return the memory usage of the Index values.
Expand Down Expand Up @@ -977,13 +1044,15 @@ class MultiIndex(Index):

def __init__(
self,
levels: Union[list, pdarray, Strings, Categorical],
levels: Union[list, tuple, pdarray, Strings, Categorical],
name: Optional[str] = None,
names: Optional[list[str]] = None,
):
self.registered_name: Optional[str] = None
if not (isinstance(levels, list) or isinstance(levels, tuple)):
raise TypeError("MultiIndex should be an iterable")
elif isinstance(levels, tuple):
levels = list(levels)
self.levels = levels
first = True
self.names = names
Expand Down
19 changes: 8 additions & 11 deletions arkouda/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,8 +685,8 @@ def sort_index(self, ascending: bool = True) -> Series:
A new Series sorted.
"""

idx = self.index.argsort(ascending=ascending)
return self._reindex(idx)
perm = self.index.argsort(ascending=ascending)
return self._reindex(perm)

@typechecked
def sort_values(self, ascending: bool = True) -> Series:
Expand All @@ -701,21 +701,18 @@ def sort_values(self, ascending: bool = True) -> Series:
-------
A new Series sorted smallest to largest
"""

from arkouda.util import is_numeric
if not ascending:
if isinstance(self.values, pdarray) and self.values.dtype in (
int64,
float64,
):
if isinstance(self.values, pdarray) and is_numeric(self.values):
# For numeric values, negation reverses sort order
idx = argsort(-self.values)
perm = argsort(-self.values)
else:
# For non-numeric values, need the descending arange because reverse slicing
# is not supported
idx = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
perm = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
else:
idx = argsort(self.values)
return self._reindex(idx)
perm = argsort(self.values)
return self._reindex(perm)

@typechecked
def tail(self, n: int = 10) -> Series:
Expand Down
27 changes: 18 additions & 9 deletions arkouda/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

def argsort(
pda: Union[pdarray, Strings, "Categorical"], # type: ignore # noqa
ascending: bool = True,
algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
axis: int_scalars = 0,
) -> pdarray: # type: ignore
Expand Down Expand Up @@ -80,12 +81,19 @@ def argsort(
"axis": axis,
},
)
return create_pdarray(cast(str, repMsg))
sorted_array = create_pdarray(cast(str, repMsg))
if ascending is True:
return sorted_array
else:
from arkouda import arange

return sorted_array[arange(sorted_array.size - 1, -1, -1)]


def coargsort(
arrays: Sequence[Union[Strings, pdarray, "Categorical"]], # type: ignore # noqa
algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
ascending=True,
) -> pdarray: # type: ignore
"""
Return the permutation that groups the rows (left-to-right), if the
Expand Down Expand Up @@ -182,15 +190,17 @@ def coargsort(
"arr_types": atypes,
},
)
return create_pdarray(cast(str, repMsg))
sorted_array = create_pdarray(cast(str, repMsg))
if ascending is True:
return sorted_array
else:
from arkouda import arange

return sorted_array[arange(sorted_array.size - 1, -1, -1)]


@typechecked
def sort(
pda: pdarray,
algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
axis=-1
) -> pdarray:
def sort(pda: pdarray, algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD, axis=-1) -> pdarray:
"""
Return a sorted copy of the array. Only sorts numeric arrays;
for Strings, use argsort.
Expand Down Expand Up @@ -236,7 +246,6 @@ def sort(
if pda.size == 0:
return zeros(0, dtype=pda.dtype)
repMsg = generic_msg(
cmd=f"sort{pda.ndim}D",
args={"alg": algorithm.name, "array": pda, "axis": axis}
cmd=f"sort{pda.ndim}D", args={"alg": algorithm.name, "array": pda, "axis": axis}
)
return create_pdarray(cast(str, repMsg))

0 comments on commit 6f8dab4

Please sign in to comment.