Closes Bears-R-Us#3177 Index.sort_values

ajpotts · May 29, 2024 · 6d4af0f · 6d4af0f
1 parent 8cc410c
commit 6d4af0f
Show file tree

Hide file tree

Showing 3 changed files with 104 additions and 15 deletions.
diff --git a/arkouda/index.py b/arkouda/index.py
@@ -346,6 +346,82 @@ def equals(self, other: Index) -> bool:
         else:
             return akall(self == other)
 
+    def _reindex(self, perm):
+        if isinstance(self, MultiIndex):
+            return MultiIndex(self[perm].levels, name=self.name, names=self.names)
+        elif isinstance(self.values, list):
+            return Index(self.values[perm], name=self.name, allow_list=True)
+        else:
+            return Index(self.values[perm], name=self.name)
+
+    @typechecked
+    def sort_values(self, ascending: bool = True) -> Index:
+        """
+        Return a sorted copy of the index.
+
+        Return a sorted copy of the index, and optionally return the indices
+        that sorted the index itself.
+
+        Parameters
+        ----------
+        return_indexer : bool, default False
+            Should the indices that would sort the index be returned.
+        ascending : bool, default True
+            Should the index values be sorted in an ascending order.
+        na_position : {'first' or 'last'}, default 'last'
+            Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
+            the end.
+        key : callable, optional
+            If not None, apply the key function to the index values
+            before sorting. This is similar to the `key` argument in the
+            builtin :meth:`sorted` function, with the notable difference that
+            this `key` function should be *vectorized*. It should expect an
+            ``Index`` and return an ``Index`` of the same shape.
+
+        Returns
+        -------
+        sorted_index : pandas.Index
+            Sorted copy of the index.
+        indexer : numpy.ndarray, optional
+            The indices that the index itself was sorted by.
+
+        See Also
+        --------
+        Series.sort_values : Sort values of a Series.
+        DataFrame.sort_values : Sort values in a DataFrame.
+
+        Examples
+        --------
+        >>> idx = pd.Index([10, 100, 1, 1000])
+        >>> idx
+        Index([10, 100, 1, 1000], dtype='int64')
+
+        Sort values in ascending order (default behavior).
+
+        >>> idx.sort_values()
+        Index([1, 10, 100, 1000], dtype='int64')
+
+        Sort values in descending order, and also get the indices `idx` was
+        sorted by.
+
+        >>> idx.sort_values(ascending=False, return_indexer=True)
+        (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
+
+        """
+
+        if isinstance(self, MultiIndex):
+            perm = coargsort(self.levels, ascending=ascending)
+        elif isinstance(self.values, list):
+            from numpy import argsort as np_argsort
+
+            if ascending is True:
+                perm = np_argsort(self.values).tolist()
+            else:
+                perm = np_argsort(self.values)[::-1].tolist()
+        else:
+            perm = argsort(self.values, ascending=ascending)
+        return self._reindex(perm)
+
     def memory_usage(self, unit="B"):
         """
         Return the memory usage of the Index values.
@@ -1047,13 +1123,15 @@ class MultiIndex(Index):
 
     def __init__(
         self,
-        levels: Union[list, pdarray, Strings, Categorical],
+        levels: Union[list, tuple, pdarray, Strings, Categorical],
         name: Optional[str] = None,
         names: Optional[list[str]] = None,
     ):
         self.registered_name: Optional[str] = None
         if not (isinstance(levels, list) or isinstance(levels, tuple)):
             raise TypeError("MultiIndex should be an iterable")
+        elif isinstance(levels, tuple):
+            levels = list(levels)
         self.levels = levels
         first = True
         self._names = names

diff --git a/arkouda/series.py b/arkouda/series.py
@@ -685,8 +685,8 @@ def sort_index(self, ascending: bool = True) -> Series:
         A new Series sorted.
         """
 
-        idx = self.index.argsort(ascending=ascending)
-        return self._reindex(idx)
+        perm = self.index.argsort(ascending=ascending)
+        return self._reindex(perm)
 
     @typechecked
     def sort_values(self, ascending: bool = True) -> Series:
@@ -701,21 +701,18 @@ def sort_values(self, ascending: bool = True) -> Series:
         -------
         A new Series sorted smallest to largest
         """
-
+        from arkouda.util import is_numeric
         if not ascending:
-            if isinstance(self.values, pdarray) and self.values.dtype in (
-                int64,
-                float64,
-            ):
+            if isinstance(self.values, pdarray) and is_numeric(self.values):
                 # For numeric values, negation reverses sort order
-                idx = argsort(-self.values)
+                perm = argsort(-self.values)
             else:
                 # For non-numeric values, need the descending arange because reverse slicing
                 # is not supported
-                idx = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
+                perm = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
         else:
-            idx = argsort(self.values)
-        return self._reindex(idx)
+            perm = argsort(self.values)
+        return self._reindex(perm)
 
     @typechecked
     def tail(self, n: int = 10) -> Series:

diff --git a/arkouda/sorting.py b/arkouda/sorting.py
@@ -20,6 +20,7 @@
 
 def argsort(
     pda: Union[pdarray, Strings, "Categorical"],  # type: ignore # noqa
+    ascending: bool = True,
     algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
     axis: int_scalars = 0,
 ) -> pdarray:
@@ -80,13 +81,20 @@ def argsort(
             "axis": axis,
         },
     )
-    return create_pdarray(cast(str, repMsg))
+    sorted_array = create_pdarray(cast(str, repMsg))
+    if ascending is True:
+        return sorted_array
+    else:
+        from arkouda import arange
+
+        return sorted_array[arange(sorted_array.size - 1, -1, -1)]
 
 
 def coargsort(
     arrays: Sequence[Union[Strings, pdarray, "Categorical"]],  # type: ignore # noqa
     algorithm: SortingAlgorithm = SortingAlgorithm.RadixSortLSD,
-) -> pdarray:
+    ascending=True,
+) -> pdarray:  # type: ignore
     """
     Return the permutation that groups the rows (left-to-right), if the
     input arrays are treated as columns. The permutation sorts numeric
@@ -182,7 +190,13 @@ def coargsort(
             "arr_types": atypes,
         },
     )
-    return create_pdarray(cast(str, repMsg))
+    sorted_array = create_pdarray(cast(str, repMsg))
+    if ascending is True:
+        return sorted_array
+    else:
+        from arkouda import arange
+
+        return sorted_array[arange(sorted_array.size - 1, -1, -1)]
 
 
 @typechecked