From f33c0a861629830ebaefc04bf5cece4b6b3697d3 Mon Sep 17 00:00:00 2001 From: Bas Schoenmaeckers Date: Tue, 21 Jan 2025 13:09:38 +0100 Subject: [PATCH] feat: extract timezone info from python datetimes --- crates/polars-core/src/frame/row/av_buffer.rs | 5 ++- crates/polars-plan/src/plans/lit.rs | 3 ++ .../polars-python/src/conversion/any_value.rs | 35 +++++++++++++------ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 3441166bb33f..d0da0c87dcd9 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -96,7 +96,10 @@ impl<'a> AnyValueBuffer<'a> { #[cfg(feature = "dtype-datetime")] (Datetime(builder, _, _), AnyValue::Null) => builder.append_null(), #[cfg(feature = "dtype-datetime")] - (Datetime(builder, tu_l, _), AnyValue::Datetime(v, tu_r, _)) => { + ( + Datetime(builder, tu_l, _), + AnyValue::Datetime(v, tu_r, _) | AnyValue::DatetimeOwned(v, tu_r, _), + ) => { // we convert right tu to left tu // so we swap. let v = convert_time_units(v, tu_r, *tu_l); diff --git a/crates/polars-plan/src/plans/lit.rs b/crates/polars-plan/src/plans/lit.rs index 7ab7bb21003f..33c564389a26 100644 --- a/crates/polars-plan/src/plans/lit.rs +++ b/crates/polars-plan/src/plans/lit.rs @@ -310,6 +310,9 @@ impl From> for LiteralValue { AnyValue::Date(v) => LiteralValue::Date(v), #[cfg(feature = "dtype-datetime")] AnyValue::Datetime(value, tu, tz) => LiteralValue::DateTime(value, tu, tz.cloned()), + AnyValue::DatetimeOwned(value, tu, tz) => { + LiteralValue::DateTime(value, tu, tz.as_ref().map(AsRef::as_ref).cloned()) + }, #[cfg(feature = "dtype-duration")] AnyValue::Duration(value, tu) => LiteralValue::Duration(value, tu), #[cfg(feature = "dtype-time")] diff --git a/crates/polars-python/src/conversion/any_value.rs b/crates/polars-python/src/conversion/any_value.rs index 18c133eb5a45..1e0991b69bfb 100644 --- a/crates/polars-python/src/conversion/any_value.rs +++ b/crates/polars-python/src/conversion/any_value.rs @@ -1,4 +1,5 @@ use std::borrow::{Borrow, Cow}; +use std::sync::Arc; use chrono_tz::Tz; #[cfg(feature = "object")] @@ -254,31 +255,43 @@ pub(crate) fn py_object_to_any_value<'py>( let py = ob.py(); let tzinfo = ob.getattr(intern!(py, "tzinfo"))?; - let timestamp = if tzinfo.is_none() { + if tzinfo.is_none() { let datetime = ob.extract::()?; let delta = datetime - NaiveDateTime::UNIX_EPOCH; - delta.num_microseconds().unwrap() - } else if tzinfo.hasattr(intern!(py, "key"))? { + let timestamp = delta.num_microseconds().unwrap(); + return Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None)); + } + + let (timestamp, tz) = if tzinfo.hasattr(intern!(py, "key"))? { let datetime = ob.extract::>()?; + let tz = datetime.timezone().name().into(); if datetime.year() >= 2100 { // chrono-tz does not support dates after 2100 // https://github.com/chronotope/chrono-tz/issues/135 - pl_utils(py) - .bind(py) - .getattr(intern!(py, "datetime_to_int"))? - .call1((ob, intern!(py, "us")))? - .extract::()? + ( + pl_utils(py) + .bind(py) + .getattr(intern!(py, "datetime_to_int"))? + .call1((ob, intern!(py, "us")))? + .extract::()?, + tz, + ) } else { let delta = datetime.to_utc() - DateTime::UNIX_EPOCH; - delta.num_microseconds().unwrap() + (delta.num_microseconds().unwrap(), tz) } } else { let datetime = ob.extract::>()?; + let tz = datetime.timezone().to_string().into(); let delta = datetime.to_utc() - DateTime::UNIX_EPOCH; - delta.num_microseconds().unwrap() + (delta.num_microseconds().unwrap(), tz) }; - Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None)) + Ok(AnyValue::DatetimeOwned( + timestamp, + TimeUnit::Microseconds, + Some(Arc::new(tz)), + )) } fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> {