Skip to content

Commit

Permalink
Plumb pylibcudf datetime APIs through cudf python (#17275)
Browse files Browse the repository at this point in the history
Apart of #15162

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #17275
  • Loading branch information
Matt711 authored Nov 8, 2024
1 parent 3c5f787 commit 18041b5
Showing 1 changed file with 70 additions and 110 deletions.
180 changes: 70 additions & 110 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,29 @@ from libcpp.utility cimport move

cimport pylibcudf.libcudf.datetime as libcudf_datetime
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.filling cimport calendrical_month_sequence
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type
from pylibcudf.datetime import DatetimeComponent
from pylibcudf.datetime import DatetimeComponent, RoundingFrequency

from cudf._lib.column cimport Column
from cudf._lib.scalar cimport DeviceScalar
import pylibcudf as plc


@acquire_spill_lock()
def add_months(Column col, Column months):
# months must be int16 dtype
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef column_view months_view = months.view()

with nogil:
c_result = move(
libcudf_datetime.add_calendrical_months(
col_view,
months_view
)
return Column.from_pylibcudf(
plc.datetime.add_calendrical_months(
col.to_pylibcudf(mode="read"),
months.to_pylibcudf(mode="read")
)

return Column.from_unique_ptr(move(c_result))
)


@acquire_spill_lock()
def extract_datetime_component(Column col, object field):

cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.datetime_component component

component_names = {
"year": DatetimeComponent.YEAR,
"month": DatetimeComponent.MONTH,
Expand All @@ -57,33 +45,29 @@ def extract_datetime_component(Column col, object field):
"nanosecond": DatetimeComponent.NANOSECOND,
}
if field == "day_of_year":
with nogil:
c_result = move(libcudf_datetime.day_of_year(col_view))
result = Column.from_pylibcudf(
plc.datetime.day_of_year(
col.to_pylibcudf(mode="read")
)
)
elif field in component_names:
component = component_names[field]
with nogil:
c_result = move(
libcudf_datetime.extract_datetime_component(
col_view,
component
)
result = Column.from_pylibcudf(
plc.datetime.extract_datetime_component(
col.to_pylibcudf(mode="read"),
component_names[field],
)
)
if field == "weekday":
# Pandas counts Monday-Sunday as 0-6
# while libcudf counts Monday-Sunday as 1-7
result = result - result.dtype.type(1)
else:
raise ValueError(f"Invalid field: '{field}'")

result = Column.from_unique_ptr(move(c_result))

if field == "weekday":
# Pandas counts Monday-Sunday as 0-6
# while libcudf counts Monday-Sunday as 1-7
result = result - result.dtype.type(1)

return result


cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
cdef libcudf_datetime.rounding_frequency freq_val

# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html
old_to_new_freq_map = {
"H": "h",
Expand All @@ -101,78 +85,60 @@ cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
FutureWarning
)
freq = old_to_new_freq_map.get(freq)
if freq == "D":
freq_val = libcudf_datetime.rounding_frequency.DAY
elif freq == "h":
freq_val = libcudf_datetime.rounding_frequency.HOUR
elif freq == "min":
freq_val = libcudf_datetime.rounding_frequency.MINUTE
elif freq == "s":
freq_val = libcudf_datetime.rounding_frequency.SECOND
elif freq == "ms":
freq_val = libcudf_datetime.rounding_frequency.MILLISECOND
elif freq == "us":
freq_val = libcudf_datetime.rounding_frequency.MICROSECOND
elif freq == "ns":
freq_val = libcudf_datetime.rounding_frequency.NANOSECOND
rounding_fequency_map = {
"D": RoundingFrequency.DAY,
"h": RoundingFrequency.HOUR,
"min": RoundingFrequency.MINUTE,
"s": RoundingFrequency.SECOND,
"ms": RoundingFrequency.MILLISECOND,
"us": RoundingFrequency.MICROSECOND,
"ns": RoundingFrequency.NANOSECOND,
}
if freq in rounding_fequency_map:
return rounding_fequency_map[freq]
else:
raise ValueError(f"Invalid resolution: '{freq}'")
return freq_val


@acquire_spill_lock()
def ceil_datetime(Column col, object freq):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.rounding_frequency freq_val = \
_get_rounding_frequency(freq)

with nogil:
c_result = move(libcudf_datetime.ceil_datetimes(col_view, freq_val))

result = Column.from_unique_ptr(move(c_result))
return result
return Column.from_pylibcudf(
plc.datetime.ceil_datetimes(
col.to_pylibcudf(mode="read"),
_get_rounding_frequency(freq),
)
)


@acquire_spill_lock()
def floor_datetime(Column col, object freq):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.rounding_frequency freq_val = \
_get_rounding_frequency(freq)

with nogil:
c_result = move(libcudf_datetime.floor_datetimes(col_view, freq_val))

result = Column.from_unique_ptr(move(c_result))
return result
return Column.from_pylibcudf(
plc.datetime.floor_datetimes(
col.to_pylibcudf(mode="read"),
_get_rounding_frequency(freq),
)
)


@acquire_spill_lock()
def round_datetime(Column col, object freq):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.rounding_frequency freq_val = \
_get_rounding_frequency(freq)

with nogil:
c_result = move(libcudf_datetime.round_datetimes(col_view, freq_val))

result = Column.from_unique_ptr(move(c_result))
return result
return Column.from_pylibcudf(
plc.datetime.round_datetimes(
col.to_pylibcudf(mode="read"),
_get_rounding_frequency(freq),
)
)


@acquire_spill_lock()
def is_leap_year(Column col):
"""Returns a boolean indicator whether the year of the date is a leap year
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.is_leap_year(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.is_leap_year(
col.to_pylibcudf(mode="read")
)
)


@acquire_spill_lock()
Expand All @@ -199,34 +165,28 @@ def extract_quarter(Column col):
Returns a column which contains the corresponding quarter of the year
for every timestamp inside the input column.
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.extract_quarter(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.extract_quarter(
col.to_pylibcudf(mode="read")
)
)


@acquire_spill_lock()
def days_in_month(Column col):
"""Extracts the number of days in the month of the date
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.days_in_month(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.days_in_month(
col.to_pylibcudf(mode="read")
)
)


@acquire_spill_lock()
def last_day_of_month(Column col):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.last_day_of_month(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.last_day_of_month(
col.to_pylibcudf(mode="read")
)
)

0 comments on commit 18041b5

Please sign in to comment.