From 18041b5b91234c4fd878497739498f926838bb39 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 7 Nov 2024 22:16:01 -0500 Subject: [PATCH] Plumb pylibcudf datetime APIs through cudf python (#17275) Apart of #15162 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/17275 --- python/cudf/cudf/_lib/datetime.pyx | 180 +++++++++++------------------ 1 file changed, 70 insertions(+), 110 deletions(-) diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index d844466120f..2c7a585f4b1 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -9,41 +9,29 @@ from libcpp.utility cimport move cimport pylibcudf.libcudf.datetime as libcudf_datetime from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.filling cimport calendrical_month_sequence from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport size_type -from pylibcudf.datetime import DatetimeComponent +from pylibcudf.datetime import DatetimeComponent, RoundingFrequency from cudf._lib.column cimport Column from cudf._lib.scalar cimport DeviceScalar +import pylibcudf as plc @acquire_spill_lock() def add_months(Column col, Column months): # months must be int16 dtype - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - cdef column_view months_view = months.view() - - with nogil: - c_result = move( - libcudf_datetime.add_calendrical_months( - col_view, - months_view - ) + return Column.from_pylibcudf( + plc.datetime.add_calendrical_months( + col.to_pylibcudf(mode="read"), + months.to_pylibcudf(mode="read") ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() def extract_datetime_component(Column col, object field): - - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - cdef libcudf_datetime.datetime_component component - component_names = { "year": DatetimeComponent.YEAR, "month": DatetimeComponent.MONTH, @@ -57,33 +45,29 @@ def extract_datetime_component(Column col, object field): "nanosecond": DatetimeComponent.NANOSECOND, } if field == "day_of_year": - with nogil: - c_result = move(libcudf_datetime.day_of_year(col_view)) + result = Column.from_pylibcudf( + plc.datetime.day_of_year( + col.to_pylibcudf(mode="read") + ) + ) elif field in component_names: - component = component_names[field] - with nogil: - c_result = move( - libcudf_datetime.extract_datetime_component( - col_view, - component - ) + result = Column.from_pylibcudf( + plc.datetime.extract_datetime_component( + col.to_pylibcudf(mode="read"), + component_names[field], ) + ) + if field == "weekday": + # Pandas counts Monday-Sunday as 0-6 + # while libcudf counts Monday-Sunday as 1-7 + result = result - result.dtype.type(1) else: raise ValueError(f"Invalid field: '{field}'") - result = Column.from_unique_ptr(move(c_result)) - - if field == "weekday": - # Pandas counts Monday-Sunday as 0-6 - # while libcudf counts Monday-Sunday as 1-7 - result = result - result.dtype.type(1) - return result cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq): - cdef libcudf_datetime.rounding_frequency freq_val - # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html old_to_new_freq_map = { "H": "h", @@ -101,78 +85,60 @@ cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq): FutureWarning ) freq = old_to_new_freq_map.get(freq) - if freq == "D": - freq_val = libcudf_datetime.rounding_frequency.DAY - elif freq == "h": - freq_val = libcudf_datetime.rounding_frequency.HOUR - elif freq == "min": - freq_val = libcudf_datetime.rounding_frequency.MINUTE - elif freq == "s": - freq_val = libcudf_datetime.rounding_frequency.SECOND - elif freq == "ms": - freq_val = libcudf_datetime.rounding_frequency.MILLISECOND - elif freq == "us": - freq_val = libcudf_datetime.rounding_frequency.MICROSECOND - elif freq == "ns": - freq_val = libcudf_datetime.rounding_frequency.NANOSECOND + rounding_fequency_map = { + "D": RoundingFrequency.DAY, + "h": RoundingFrequency.HOUR, + "min": RoundingFrequency.MINUTE, + "s": RoundingFrequency.SECOND, + "ms": RoundingFrequency.MILLISECOND, + "us": RoundingFrequency.MICROSECOND, + "ns": RoundingFrequency.NANOSECOND, + } + if freq in rounding_fequency_map: + return rounding_fequency_map[freq] else: raise ValueError(f"Invalid resolution: '{freq}'") - return freq_val @acquire_spill_lock() def ceil_datetime(Column col, object freq): - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - cdef libcudf_datetime.rounding_frequency freq_val = \ - _get_rounding_frequency(freq) - - with nogil: - c_result = move(libcudf_datetime.ceil_datetimes(col_view, freq_val)) - - result = Column.from_unique_ptr(move(c_result)) - return result + return Column.from_pylibcudf( + plc.datetime.ceil_datetimes( + col.to_pylibcudf(mode="read"), + _get_rounding_frequency(freq), + ) + ) @acquire_spill_lock() def floor_datetime(Column col, object freq): - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - cdef libcudf_datetime.rounding_frequency freq_val = \ - _get_rounding_frequency(freq) - - with nogil: - c_result = move(libcudf_datetime.floor_datetimes(col_view, freq_val)) - - result = Column.from_unique_ptr(move(c_result)) - return result + return Column.from_pylibcudf( + plc.datetime.floor_datetimes( + col.to_pylibcudf(mode="read"), + _get_rounding_frequency(freq), + ) + ) @acquire_spill_lock() def round_datetime(Column col, object freq): - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - cdef libcudf_datetime.rounding_frequency freq_val = \ - _get_rounding_frequency(freq) - - with nogil: - c_result = move(libcudf_datetime.round_datetimes(col_view, freq_val)) - - result = Column.from_unique_ptr(move(c_result)) - return result + return Column.from_pylibcudf( + plc.datetime.round_datetimes( + col.to_pylibcudf(mode="read"), + _get_rounding_frequency(freq), + ) + ) @acquire_spill_lock() def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year """ - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - - with nogil: - c_result = move(libcudf_datetime.is_leap_year(col_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.datetime.is_leap_year( + col.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() @@ -199,34 +165,28 @@ def extract_quarter(Column col): Returns a column which contains the corresponding quarter of the year for every timestamp inside the input column. """ - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - - with nogil: - c_result = move(libcudf_datetime.extract_quarter(col_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.datetime.extract_quarter( + col.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() def days_in_month(Column col): """Extracts the number of days in the month of the date """ - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - - with nogil: - c_result = move(libcudf_datetime.days_in_month(col_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.datetime.days_in_month( + col.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() def last_day_of_month(Column col): - cdef unique_ptr[column] c_result - cdef column_view col_view = col.view() - - with nogil: - c_result = move(libcudf_datetime.last_day_of_month(col_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.datetime.last_day_of_month( + col.to_pylibcudf(mode="read") + ) + )