diff --git a/arkouda/index.py b/arkouda/index.py index 26d50c1db5..05aa3a7367 100644 --- a/arkouda/index.py +++ b/arkouda/index.py @@ -346,6 +346,82 @@ def equals(self, other: Index) -> bool: else: return akall(self == other) + def _reindex(self, perm): + if isinstance(self, MultiIndex): + return MultiIndex(self[perm].levels, name=self.name, names=self.names) + elif isinstance(self.values, list): + return Index(self.values[perm], name=self.name, allow_list=True) + else: + return Index(self.values[perm], name=self.name) + + @typechecked + def sort_values(self, ascending: bool = True) -> Index: + """ + Return a sorted copy of the index. + + Return a sorted copy of the index, and optionally return the indices + that sorted the index itself. + + Parameters + ---------- + return_indexer : bool, default False + Should the indices that would sort the index be returned. + ascending : bool, default True + Should the index values be sorted in an ascending order. + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at + the end. + key : callable, optional + If not None, apply the key function to the index values + before sorting. This is similar to the `key` argument in the + builtin :meth:`sorted` function, with the notable difference that + this `key` function should be *vectorized*. It should expect an + ``Index`` and return an ``Index`` of the same shape. + + Returns + ------- + sorted_index : pandas.Index + Sorted copy of the index. + indexer : numpy.ndarray, optional + The indices that the index itself was sorted by. + + See Also + -------- + Series.sort_values : Sort values of a Series. + DataFrame.sort_values : Sort values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([10, 100, 1, 1000]) + >>> idx + Index([10, 100, 1, 1000], dtype='int64') + + Sort values in ascending order (default behavior). + + >>> idx.sort_values() + Index([1, 10, 100, 1000], dtype='int64') + + Sort values in descending order, and also get the indices `idx` was + sorted by. + + >>> idx.sort_values(ascending=False, return_indexer=True) + (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) + + """ + + if isinstance(self, MultiIndex): + perm = coargsort(self.levels, ascending=ascending) + elif isinstance(self.values, list): + from numpy import argsort as np_argsort + + if ascending is True: + perm = np_argsort(self.values).tolist() + else: + perm = np_argsort(self.values)[::-1].tolist() + else: + perm = argsort(self.values, ascending=ascending) + return self._reindex(perm) + def memory_usage(self, unit="B"): """ Return the memory usage of the Index values. @@ -1047,13 +1123,15 @@ class MultiIndex(Index): def __init__( self, - levels: Union[list, pdarray, Strings, Categorical], + levels: Union[list, tuple, pdarray, Strings, Categorical], name: Optional[str] = None, names: Optional[list[str]] = None, ): self.registered_name: Optional[str] = None if not (isinstance(levels, list) or isinstance(levels, tuple)): raise TypeError("MultiIndex should be an iterable") + elif isinstance(levels, tuple): + levels = list(levels) self.levels = levels first = True self._names = names diff --git a/arkouda/series.py b/arkouda/series.py index a3592cd562..65c9593c5c 100644 --- a/arkouda/series.py +++ b/arkouda/series.py @@ -685,8 +685,8 @@ def sort_index(self, ascending: bool = True) -> Series: A new Series sorted. """ - idx = self.index.argsort(ascending=ascending) - return self._reindex(idx) + perm = self.index.argsort(ascending=ascending) + return self._reindex(perm) @typechecked def sort_values(self, ascending: bool = True) -> Series: @@ -701,21 +701,18 @@ def sort_values(self, ascending: bool = True) -> Series: ------- A new Series sorted smallest to largest """ - + from arkouda.util import is_numeric if not ascending: - if isinstance(self.values, pdarray) and self.values.dtype in ( - int64, - float64, - ): + if isinstance(self.values, pdarray) and is_numeric(self.values): # For numeric values, negation reverses sort order - idx = argsort(-self.values) + perm = argsort(-self.values) else: # For non-numeric values, need the descending arange because reverse slicing # is not supported - idx = argsort(self.values)[arange(self.values.size - 1, -1, -1)] + perm = argsort(self.values)[arange(self.values.size - 1, -1, -1)] else: - idx = argsort(self.values) - return self._reindex(idx) + perm = argsort(self.values) + return self._reindex(perm) @typechecked def tail(self, n: int = 10) -> Series: diff --git a/arkouda/sorting.py b/arkouda/sorting.py index 3ff5d0493e..fb40147e3f 100644 --- a/arkouda/sorting.py +++ b/arkouda/sorting.py @@ -20,6 +20,7 @@ def argsort( pda: Union[pdarray, Strings, "Categorical"], # type: ignore # noqa + ascending: bool = True, algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD, axis: int_scalars = 0, ) -> pdarray: @@ -80,13 +81,20 @@ def argsort( "axis": axis, }, ) - return create_pdarray(cast(str, repMsg)) + sorted_array = create_pdarray(cast(str, repMsg)) + if ascending is True: + return sorted_array + else: + from arkouda import arange + + return sorted_array[arange(sorted_array.size - 1, -1, -1)] def coargsort( arrays: Sequence[Union[Strings, pdarray, "Categorical"]], # type: ignore # noqa algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD, -) -> pdarray: + ascending=True, +) -> pdarray: # type: ignore """ Return the permutation that groups the rows (left-to-right), if the input arrays are treated as columns. The permutation sorts numeric @@ -182,7 +190,13 @@ def coargsort( "arr_types": atypes, }, ) - return create_pdarray(cast(str, repMsg)) + sorted_array = create_pdarray(cast(str, repMsg)) + if ascending is True: + return sorted_array + else: + from arkouda import arange + + return sorted_array[arange(sorted_array.size - 1, -1, -1)] @typechecked