Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pylibcudf.search APIs in cudf python #17271

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 11 additions & 47 deletions python/cudf/cudf/_lib/sort.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,10 @@ from itertools import repeat
from cudf.core.buffer import acquire_spill_lock

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from pylibcudf.libcudf.aggregation cimport rank_method
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.search cimport lower_bound, upper_bound
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport null_order, order as cpp_order

from cudf._lib.column cimport Column
from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
table_view_from_columns,
)
from cudf._lib.utils cimport columns_from_pylibcudf_table

import pylibcudf

Expand Down Expand Up @@ -311,44 +300,19 @@ def digitize(list source_columns, list bins, bool right=False):
right : Indicating whether the intervals include the
right or the left bin edge.
"""

cdef table_view bins_view = table_view_from_columns(bins)
cdef table_view source_table_view = table_view_from_columns(
source_columns
)
cdef vector[cpp_order] column_order = (
vector[cpp_order](
bins_view.num_columns(),
cpp_order.ASCENDING
)
)
cdef vector[null_order] null_precedence = (
vector[null_order](
bins_view.num_columns(),
null_order.BEFORE
return Column.from_pylibcudf(
getattr(pylibcudf.search, "lower_bound" if right else "upper_bound")(
pylibcudf.Table(
[c.to_pylibcudf(mode="read") for c in bins]
),
pylibcudf.Table(
[c.to_pylibcudf(mode="read") for c in source_columns]
),
[pylibcudf.types.Order.ASCENDING]*len(bins),
[pylibcudf.types.NullOrder.BEFORE]*len(bins)
)
)

cdef unique_ptr[column] c_result
if right:
with nogil:
c_result = move(lower_bound(
bins_view,
source_table_view,
column_order,
null_precedence)
)
else:
with nogil:
c_result = move(upper_bound(
bins_view,
source_table_view,
column_order,
null_precedence)
)

return Column.from_unique_ptr(move(c_result))


@acquire_spill_lock()
def rank_columns(list source_columns, rank_method method, str na_option,
Expand Down
Loading