Skip to content

Commit

Permalink
Closes Bears-R-Us#3177 Index.sort_values
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts committed May 31, 2024
1 parent 5fd431e commit 0ab9716
Show file tree
Hide file tree
Showing 10 changed files with 778 additions and 40 deletions.
62 changes: 62 additions & 0 deletions PROTO_tests/tests/coargsort_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,68 @@ def test_coargsort_bool(self):
assert args[0][perm].to_list() == [False, False, True, True, True]
assert args[1][perm].to_list() == [2, 4, 1, 3, 5]

@pytest.mark.parametrize("prob_size", pytest.prob_size)
def test_coargsort_wstrings(self, prob_size):
ak_char_array = ak.random_strings_uniform(minlen=1, maxlen=2, seed=1, size=prob_size)
ak_int_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.int64, seed=1)

perm = ak.coargsort([ak_char_array, ak_int_array])
arrays = [ak_char_array[perm], ak_int_array[perm]]

# code borrowed from arkouda.alignment.is_cosorted
# initialize the array to track boundary
boundary = arrays[0][:-1] != arrays[0][1:]
for array in arrays[1:]:
left = array[:-1]
right = array[1:]
_ = left <= right
if not (_ | boundary).all():
raise ValueError
boundary = boundary | (left != right)

# Now check ascending=False
perm = ak.coargsort([ak_char_array, ak_int_array], ascending=False)
arrays = [ak_char_array[perm], ak_int_array[perm]]

# code borrowed from arkouda.alignment.is_cosorted
# initialize the array to track boundary
boundary = arrays[0][:-1] != arrays[0][1:]
for array in arrays[1:]:
left = array[:-1]
right = array[1:]
_ = left >= right
if not (_ | boundary).all():
raise ValueError
boundary = boundary | (left != right)

@pytest.mark.parametrize("prob_size", pytest.prob_size)
def test_coargsort_numeric(self, prob_size):
from arkouda.alignment import is_cosorted

ak_int_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.int64, seed=1)
ak_float_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.float64, seed=2)

perm = ak.coargsort([ak_int_array, ak_float_array])
arrays = [ak_int_array[perm], ak_float_array[perm]]

assert is_cosorted(arrays)

perm = ak.coargsort([ak_float_array, ak_int_array])
arrays = [ak_float_array[perm], ak_int_array[perm]]

assert is_cosorted(arrays)

# Now check when ascending=False
perm = ak.coargsort([ak_int_array, ak_float_array], ascending=False)
arrays = [-1 * ak_int_array[perm], -1 * ak_float_array[perm]]

assert is_cosorted(arrays)

perm = ak.coargsort([ak_float_array, ak_int_array], ascending=False)
arrays = [-1 * ak_float_array[perm], -1 * ak_int_array[perm]]

assert is_cosorted(arrays)

@pytest.mark.parametrize("algo", SortingAlgorithm)
def test_error_handling(self, algo):
ones, short_ones = ak.ones(100), ak.ones(10)
Expand Down
227 changes: 224 additions & 3 deletions PROTO_tests/tests/index_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd
import pytest
from numpy import dtype as npdtype
Expand Down Expand Up @@ -27,7 +28,7 @@ def test_index_creation_lists(self):

i3 = ak.Index(["a", "b", "c"], allow_list=True)
assert isinstance(i3.values, list)
assert i3.dtype == dtype("<U")
assert i3.dtype == dtype("<U1")

with pytest.raises(ValueError):
i4 = ak.Index([1, 2, 3], allow_list=True, max_list_size=2)
Expand Down Expand Up @@ -106,10 +107,22 @@ def test_inferred_type(self):

@staticmethod
def assert_equal(pda1, pda2):
from arkouda import sum as aksum
from arkouda import isnan

assert pda1.size == pda2.size
assert aksum(pda1 != pda2) == 0
assert pda1.dtype == pda2.dtype

from arkouda.util import is_float

if is_float(pda1) and is_float(pda2):
pda1_isnan = isnan(pda1)
pda2_isnan = isnan(pda2)

assert pda1_isnan.equals(pda2_isnan)
assert pda1[~pda1_isnan].equals(pda2[~pda2_isnan])

else:
assert pda1.equals(pda2)

def test_get_item(self):
i = ak.Index([1, 2, 3])
Expand Down Expand Up @@ -262,6 +275,214 @@ def test_get_level_values(self):
with pytest.raises(ValueError):
m2.get_level_values(-1 * m2.nlevels)

@pytest.mark.parametrize("prob_size", pytest.prob_size)
def test_sort_values(self, prob_size):
# floats
ak_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.float64, seed=1)
idx = Index(ak_array)
np_array = ak_array.to_ndarray()

assert np.array_equal(
idx.sort_values(ascending=True).values.to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
idx.sort_values(ascending=False).values.to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

# ints
ak_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.int64, seed=1)
np_array = ak_array.to_ndarray()
idx = Index(ak_array)
assert np.array_equal(
idx.sort_values(ascending=True).values.to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
idx.sort_values(ascending=False).values.to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

# ints as list
ak_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.int64, seed=1).to_list()
np_array = np.array(ak_array)
idx = Index(ak_array, allow_list=True)
assert np.array_equal(idx.sort_values(ascending=True).values, np_array[np.argsort(np_array)])
assert np.array_equal(
idx.sort_values(ascending=False).values,
np_array[np.flip(np.argsort(np_array))],
)

# strings
ak_array = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=prob_size)
np_array = ak_array.to_ndarray()
idx = Index(ak_array)
assert np.array_equal(
idx.sort_values(ascending=True).values.to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
idx.sort_values(ascending=False).values.to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

# categorical
ak_array = ak.Categorical(ak_array)
np_array = ak_array.to_ndarray()
idx = Index(ak_array)
assert np.array_equal(
idx.sort_values(ascending=True).values.to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
idx.sort_values(ascending=False).values.to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

with pytest.raises(ValueError):
idx.sort_values(na_position="test")

def test_sort_values_na_position(self):
idx = ak.Index([10, 100, 1, 1000, np.nan])

idx_nans_first = idx.sort_values(na_position="first", return_indexer=False)
self.assert_equal(
idx_nans_first.values,
ak.array(
[
np.nan,
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
]
),
)

# Test with return_indexer
idx_nans_first, indexer = idx.sort_values(na_position="first", return_indexer=True)
self.assert_equal(
idx_nans_first.values,
ak.array(
[
np.nan,
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
]
),
)

self.assert_equal(
indexer,
ak.array(
[
4,
2,
0,
1,
3,
]
),
)

idx_nans_last = idx.sort_values(na_position="last", return_indexer=False)
self.assert_equal(
idx_nans_last.values,
ak.array(
[
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
np.nan,
]
),
)

# Test with return_indexer
idx_nans_last, indexer = idx.sort_values(na_position="last", return_indexer=True)
self.assert_equal(
idx_nans_last.values,
ak.array(
[
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
np.nan,
]
),
)

self.assert_equal(
indexer,
ak.array(
[
2,
0,
1,
3,
4,
]
),
)

def test_sort_values_na_position_list_case(self):
idx = ak.Index([10, 100, 1, 1000, np.nan], allow_list=True)

idx_nans_first = idx.sort_values(na_position="first", return_indexer=False)
assert idx_nans_first.values == [
np.nan,
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
]

# Test with return_indexer
idx_nans_first, indexer = idx.sort_values(na_position="first", return_indexer=True)
assert idx_nans_first.values == [
np.nan,
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
]

assert indexer == [
4,
2,
0,
1,
3,
]

idx_nans_last = idx.sort_values(na_position="last", return_indexer=False)
assert idx_nans_last.values == [
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
np.nan,
]

# Test with return_indexer
idx_nans_last, indexer = idx.sort_values(na_position="last", return_indexer=True)
assert idx_nans_last.values == [
1.00000000000000000,
10.00000000000000000,
100.00000000000000000,
1000.00000000000000000,
np.nan,
]

assert indexer == [
2,
0,
1,
3,
4,
]

@pytest.mark.parametrize("size", pytest.prob_size)
def test_memory_usage(self, size):
from arkouda.dtypes import BigInt
Expand Down
47 changes: 47 additions & 0 deletions PROTO_tests/tests/sort_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,50 @@ def test_nan_sort(self, algo):
pos_arr = np.array([3.14, np.inf, np.nan, np.inf, 7.7, 0.0, 3.14, 8])
for npa in neg_arr, pos_arr:
assert np.allclose(np.sort(npa), ak.sort(ak.array(npa), algo).to_ndarray(), equal_nan=True)

@pytest.mark.parametrize("prob_size", pytest.prob_size)
def test_argsort(self, prob_size):
# floats
ak_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.float64, seed=1)
np_array = ak_array.to_ndarray()

assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=True)].to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=False)].to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

# ints
ak_array = ak.randint(0, 10 * prob_size, prob_size, dtype=ak.int64, seed=1)
np_array = ak_array.to_ndarray()
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=True)].to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=False)].to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

# strings
ak_array = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=prob_size)
np_array = ak_array.to_ndarray()
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=True)].to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=False)].to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)

# categorical
ak_array = ak.Categorical(ak_array)
np_array = ak_array.to_ndarray()
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=True)].to_ndarray(), np_array[np.argsort(np_array)]
)
assert np.array_equal(
ak_array[ak.argsort(ak_array, ascending=False)].to_ndarray(),
np_array[np.flip(np.argsort(np_array))],
)
Loading

0 comments on commit 0ab9716

Please sign in to comment.