From 6d1122be2a25cbd726536ac92061156fd7a9a0b4 Mon Sep 17 00:00:00 2001 From: Lasse Date: Thu, 22 Feb 2024 15:17:20 +0100 Subject: [PATCH 1/2] fix(#480): pure NaNs when flattening with lookbehind-tuple in 01_basic Fixes #480 --- docs/tutorials/01_basic.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/01_basic.ipynb b/docs/tutorials/01_basic.ipynb index e34e455f..094d0345 100644 --- a/docs/tutorials/01_basic.ipynb +++ b/docs/tutorials/01_basic.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [ { From 3ade969b3db28aba52690afefa855b8cc8c9cc2f Mon Sep 17 00:00:00 2001 From: Lasse Date: Thu, 22 Feb 2024 15:46:38 +0100 Subject: [PATCH 2/2] fix: bug in `_get_timedelta_frame` if timestamp col names were the same --- .../spec_processors/temporal.py | 11 +++++--- .../spec_processors/test_temporal.py | 25 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/timeseriesflattenerv2/spec_processors/temporal.py b/src/timeseriesflattenerv2/spec_processors/temporal.py index 5639aa24..e220e253 100644 --- a/src/timeseriesflattenerv2/spec_processors/temporal.py +++ b/src/timeseriesflattenerv2/spec_processors/temporal.py @@ -24,15 +24,20 @@ def _get_timedelta_frame( predictiontime_frame: PredictionTimeFrame, value_frame: ValueFrame ) -> TimeDeltaFrame: # Join the prediction time dataframe - joined_frame = predictiontime_frame.df.join( - value_frame.df, on=predictiontime_frame.entity_id_col_name, how="left" + # ensure that the timestamp col names are different to avoid conflicts + unique_predictiontime_frame_timestamp_col_name = ( + f"__{predictiontime_frame.timestamp_col_name}__" ) + joined_frame = predictiontime_frame.df.rename( + {predictiontime_frame.timestamp_col_name: unique_predictiontime_frame_timestamp_col_name} + ).join(value_frame.df, on=predictiontime_frame.entity_id_col_name, how="left") + # Get timedelta timedelta_frame = joined_frame.with_columns( ( pl.col(value_frame.value_timestamp_col_name) - - pl.col(predictiontime_frame.timestamp_col_name) + - pl.col(unique_predictiontime_frame_timestamp_col_name) ).alias("time_from_prediction_to_value") ) diff --git a/src/timeseriesflattenerv2/spec_processors/test_temporal.py b/src/timeseriesflattenerv2/spec_processors/test_temporal.py index bd77a7aa..428de6a4 100644 --- a/src/timeseriesflattenerv2/spec_processors/test_temporal.py +++ b/src/timeseriesflattenerv2/spec_processors/test_temporal.py @@ -117,6 +117,31 @@ def test_get_timedelta_frame(): assert result.get_timedeltas() == expected_timedeltas +def test_get_timedelta_frame_same_timestamp_col_names(): + pred_frame = str_to_pl_df( + """entity_id,timestamp + 1,2021-01-03""" + ) + + value_frame = str_to_pl_df( + """entity_id,value,timestamp + 1,1,2021-01-01 + 1,2,2021-01-02 + 1,3,2021-01-03""" + ) + + expected_timedeltas = [dt.timedelta(days=-2), dt.timedelta(days=-1), dt.timedelta(days=0)] + + result = process_spec._get_timedelta_frame( + predictiontime_frame=PredictionTimeFrame( + init_df=pred_frame.lazy(), timestamp_col_name="timestamp" + ), + value_frame=ValueFrame(init_df=value_frame.lazy()), + ) + + assert result.get_timedeltas() == expected_timedeltas + + def test_slice_without_any_within_window(): timedelta_frame = TimeDeltaFrame( df=pl.LazyFrame(