Skip to content

Commit

Permalink
docs(#496): add some docstrings to user facing specs on the expected …
Browse files Browse the repository at this point in the history
…type of input and general usage (#499) (#500)

Fixes #496
  • Loading branch information
MartinBernstorff authored Feb 23, 2024
2 parents 1d4e2b0 + 3cfa8d5 commit cf30e04
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 12 deletions.
4 changes: 2 additions & 2 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ coloredlogs==15.0.1
# via timeseriesflattener
comm==0.2.1
# via ipykernel
coverage==7.4.1
coverage==7.4.2
# via pytest-cov
# via pytest-testmon
debugpy==1.8.1
Expand Down Expand Up @@ -121,7 +121,7 @@ pluggy==1.4.0
# via pytest
plum-dispatch==1.7.4
# via quartodoc
polars==0.20.9
polars==0.20.10
# via skimpy
# via timeseriesflattener
prompt-toolkit==3.0.43
Expand Down
2 changes: 1 addition & 1 deletion requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ platformdirs==4.2.0
# via jupyter-core
plum-dispatch==1.7.4
# via quartodoc
polars==0.20.9
polars==0.20.10
# via skimpy
# via timeseriesflattener
prompt-toolkit==3.0.43
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class PredictionTimeFrame:

def __post_init__(self, init_df: InitDF_T):
self.df = _anyframe_to_lazyframe(init_df)

self.df = self.df.with_columns(
pl.concat_str(
pl.col(self.entity_id_col_name), pl.lit("-"), pl.col(self.timestamp_col_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ def _anyframe_to_lazyframe(init_df: InitDF_T) -> pl.LazyFrame:
if isinstance(init_df, pd.DataFrame):
return pl.from_pandas(init_df).lazy()
raise ValueError(f"Unsupported type: {type(init_df)}.")


def _anyframe_to_eagerframe(init_df: InitDF_T) -> pl.DataFrame:
return _anyframe_to_lazyframe(init_df).collect()
22 changes: 13 additions & 9 deletions src/timeseriesflattenerv2/spec_processors/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,24 @@ def _get_timedelta_frame(
predictiontime_frame: PredictionTimeFrame, value_frame: ValueFrame
) -> TimeDeltaFrame:
# Join the prediction time dataframe
# ensure that the timestamp col names are different to avoid conflicts
unique_predictiontime_frame_timestamp_col_name = (
f"__{predictiontime_frame.timestamp_col_name}__"
)
if predictiontime_frame.timestamp_col_name == value_frame.value_timestamp_col_name:
"""If the timestamp col names are the same, they cause conflicts when joining."""
predictiontime_timestamp_col_name = f"_{predictiontime_frame.timestamp_col_name}"
join_patient_times = predictiontime_frame.df.rename(
{predictiontime_frame.timestamp_col_name: predictiontime_timestamp_col_name}
)
else:
predictiontime_timestamp_col_name = predictiontime_frame.timestamp_col_name
join_patient_times = predictiontime_frame.df

joined_frame = predictiontime_frame.df.rename(
{predictiontime_frame.timestamp_col_name: unique_predictiontime_frame_timestamp_col_name}
).join(value_frame.df, on=predictiontime_frame.entity_id_col_name, how="left")
joined_frame = join_patient_times.join(
value_frame.df, on=predictiontime_frame.entity_id_col_name, how="left"
)

# Get timedelta
timedelta_frame = joined_frame.with_columns(
(
pl.col(value_frame.value_timestamp_col_name)
- pl.col(unique_predictiontime_frame_timestamp_col_name)
pl.col(value_frame.value_timestamp_col_name) - pl.col(predictiontime_timestamp_col_name)
).alias("time_from_prediction_to_value")
)

Expand Down

0 comments on commit cf30e04

Please sign in to comment.