Skip to content

Commit

Permalink
Move segmented_gather function from the copying module to the lists…
Browse files Browse the repository at this point in the history
… module (#17148)

This PR moves `segmented_gather` out of the copying module and into the lists module. And it uses the pylibcudf `segmented_gather` implementation in cudf python.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: #17148
  • Loading branch information
Matt711 authored Oct 24, 2024
1 parent b75036b commit 7115f20
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 40 deletions.
26 changes: 1 addition & 25 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import pickle

from libc.stdint cimport uint8_t, uintptr_t
from libcpp cimport bool
from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

Expand All @@ -30,10 +30,6 @@ from libcpp.memory cimport make_unique
cimport pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.gather cimport (
segmented_gather as cpp_segmented_gather,
)
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type

Expand Down Expand Up @@ -339,26 +335,6 @@ def get_element(Column input_column, size_type index):
)


@acquire_spill_lock()
def segmented_gather(Column source_column, Column gather_map):
cdef shared_ptr[lists_column_view] source_LCV = (
make_shared[lists_column_view](source_column.view())
)
cdef shared_ptr[lists_column_view] gather_map_LCV = (
make_shared[lists_column_view](gather_map.view())
)
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_segmented_gather(
source_LCV.get()[0], gather_map_LCV.get()[0])
)

result = Column.from_unique_ptr(move(c_result))
return result


cdef class _CPackedColumns:

@staticmethod
Expand Down
38 changes: 24 additions & 14 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,24 @@ from pylibcudf.libcudf.types cimport null_order, size_type
from cudf._lib.column cimport Column
from cudf._lib.utils cimport columns_from_pylibcudf_table

import pylibcudf
import pylibcudf as plc

from pylibcudf cimport Scalar


@acquire_spill_lock()
def count_elements(Column col):
return Column.from_pylibcudf(
pylibcudf.lists.count_elements(
plc.lists.count_elements(
col.to_pylibcudf(mode="read"))
)


@acquire_spill_lock()
def explode_outer(list source_columns, int explode_column_idx):
return columns_from_pylibcudf_table(
pylibcudf.lists.explode_outer(
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in source_columns]),
plc.lists.explode_outer(
plc.Table([c.to_pylibcudf(mode="read") for c in source_columns]),
explode_column_idx,
)
)
Expand All @@ -35,7 +35,7 @@ def explode_outer(list source_columns, int explode_column_idx):
@acquire_spill_lock()
def distinct(Column col, bool nulls_equal, bool nans_all_equal):
return Column.from_pylibcudf(
pylibcudf.lists.distinct(
plc.lists.distinct(
col.to_pylibcudf(mode="read"),
nulls_equal,
nans_all_equal,
Expand All @@ -46,7 +46,7 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal):
@acquire_spill_lock()
def sort_lists(Column col, bool ascending, str na_position):
return Column.from_pylibcudf(
pylibcudf.lists.sort_lists(
plc.lists.sort_lists(
col.to_pylibcudf(mode="read"),
ascending,
null_order.BEFORE if na_position == "first" else null_order.AFTER,
Expand All @@ -58,7 +58,7 @@ def sort_lists(Column col, bool ascending, str na_position):
@acquire_spill_lock()
def extract_element_scalar(Column col, size_type index):
return Column.from_pylibcudf(
pylibcudf.lists.extract_list_element(
plc.lists.extract_list_element(
col.to_pylibcudf(mode="read"),
index,
)
Expand All @@ -68,7 +68,7 @@ def extract_element_scalar(Column col, size_type index):
@acquire_spill_lock()
def extract_element_column(Column col, Column index):
return Column.from_pylibcudf(
pylibcudf.lists.extract_list_element(
plc.lists.extract_list_element(
col.to_pylibcudf(mode="read"),
index.to_pylibcudf(mode="read"),
)
Expand All @@ -78,7 +78,7 @@ def extract_element_column(Column col, Column index):
@acquire_spill_lock()
def contains_scalar(Column col, py_search_key):
return Column.from_pylibcudf(
pylibcudf.lists.contains(
plc.lists.contains(
col.to_pylibcudf(mode="read"),
<Scalar> py_search_key.device_value.c_value,
)
Expand All @@ -88,7 +88,7 @@ def contains_scalar(Column col, py_search_key):
@acquire_spill_lock()
def index_of_scalar(Column col, object py_search_key):
return Column.from_pylibcudf(
pylibcudf.lists.index_of(
plc.lists.index_of(
col.to_pylibcudf(mode="read"),
<Scalar> py_search_key.device_value.c_value,
True,
Expand All @@ -99,7 +99,7 @@ def index_of_scalar(Column col, object py_search_key):
@acquire_spill_lock()
def index_of_column(Column col, Column search_keys):
return Column.from_pylibcudf(
pylibcudf.lists.index_of(
plc.lists.index_of(
col.to_pylibcudf(mode="read"),
search_keys.to_pylibcudf(mode="read"),
True,
Expand All @@ -110,8 +110,8 @@ def index_of_column(Column col, Column search_keys):
@acquire_spill_lock()
def concatenate_rows(list source_columns):
return Column.from_pylibcudf(
pylibcudf.lists.concatenate_rows(
pylibcudf.Table([
plc.lists.concatenate_rows(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
])
)
Expand All @@ -121,8 +121,18 @@ def concatenate_rows(list source_columns):
@acquire_spill_lock()
def concatenate_list_elements(Column input_column, dropna=False):
return Column.from_pylibcudf(
pylibcudf.lists.concatenate_list_elements(
plc.lists.concatenate_list_elements(
input_column.to_pylibcudf(mode="read"),
dropna,
)
)


@acquire_spill_lock()
def segmented_gather(Column source_column, Column gather_map):
return Column.from_pylibcudf(
plc.lists.segmented_gather(
source_column.to_pylibcudf(mode="read"),
gather_map.to_pylibcudf(mode="read"),
)
)
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from typing_extensions import Self

import cudf
from cudf._lib.copying import segmented_gather
from cudf._lib.lists import (
concatenate_list_elements,
concatenate_rows,
Expand All @@ -22,6 +21,7 @@
extract_element_scalar,
index_of_column,
index_of_scalar,
segmented_gather,
sort_lists,
)
from cudf._lib.strings.convert.convert_lists import format_list_column
Expand Down

0 comments on commit 7115f20

Please sign in to comment.