-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Fixes #479
- Loading branch information
1 parent
746b91c
commit 6fdd951
Showing
1 changed file
with
186 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Creating features from legacy feature specifications\n" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Users of timeseriesflattener version 1 should not have to rewrite all their feature specifications, so we have written a simple legacy API! It works for `PredictorGroupSpec`s, since those were the ones we had by far the most of.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Say you have a legacy specification defined like this:\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"The generated specs are for version 1: True\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from __future__ import annotations\n", | ||
"\n", | ||
"import pandas as pd\n", | ||
"from timeseriesflattener.aggregation_fns import (\n", | ||
" boolean,\n", | ||
" change_per_day,\n", | ||
" count,\n", | ||
" earliest,\n", | ||
" latest,\n", | ||
" maximum,\n", | ||
" mean,\n", | ||
" minimum,\n", | ||
" summed,\n", | ||
" variance,\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from timeseriesflattener.feature_specs.group_specs import NamedDataframe, PredictorGroupSpec\n", | ||
"from timeseriesflattener.feature_specs.single_specs import PredictorSpec as Version1PredictorSpec\n", | ||
"\n", | ||
"legacy_spec = PredictorGroupSpec(\n", | ||
" lookbehind_days=((1, 2), (3, 4)),\n", | ||
" named_dataframes=[\n", | ||
" NamedDataframe(\n", | ||
" df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"1\", \"value\": 1}),\n", | ||
" name=\"test\",\n", | ||
" ),\n", | ||
" NamedDataframe(\n", | ||
" df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"2\", \"value\": 2}),\n", | ||
" name=\"test2\",\n", | ||
" ),\n", | ||
" ],\n", | ||
" aggregation_fns=[\n", | ||
" latest,\n", | ||
" earliest,\n", | ||
" maximum,\n", | ||
" minimum,\n", | ||
" mean,\n", | ||
" summed,\n", | ||
" count,\n", | ||
" variance,\n", | ||
" boolean,\n", | ||
" change_per_day,\n", | ||
" ],\n", | ||
" fallback=[0],\n", | ||
").create_combinations()\n", | ||
"\n", | ||
"print(f\"The generated specs are for version 1: {isinstance(legacy_spec[0], Version1PredictorSpec)}\")" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"If you want the output feature specifications to be compatible with version 2, all you have to do is replace the `PredictorGroupSpec` import:\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"The generated specs are for version 2: True\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from timeseriesflattenerv2.feature_specs.from_legacy import PredictorGroupSpec\n", | ||
"from timeseriesflattenerv2.feature_specs.predictor import PredictorSpec as Version2PredictorSpec\n", | ||
"\n", | ||
"new_specs = PredictorGroupSpec(\n", | ||
" lookbehind_days=((1, 2), (3, 4)),\n", | ||
" named_dataframes=[\n", | ||
" NamedDataframe(\n", | ||
" df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"1\", \"value\": 1}),\n", | ||
" name=\"test\",\n", | ||
" ),\n", | ||
" NamedDataframe(\n", | ||
" df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"2\", \"value\": 2}),\n", | ||
" name=\"test2\",\n", | ||
" ),\n", | ||
" ],\n", | ||
" aggregation_fns=[\n", | ||
" latest,\n", | ||
" earliest,\n", | ||
" maximum,\n", | ||
" minimum,\n", | ||
" mean,\n", | ||
" summed,\n", | ||
" count,\n", | ||
" variance,\n", | ||
" boolean,\n", | ||
" change_per_day,\n", | ||
" ],\n", | ||
" fallback=[0],\n", | ||
").create_combinations()\n", | ||
"\n", | ||
"print(f\"The generated specs are for version 2: {isinstance(new_specs[0], Version2PredictorSpec)}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"And then you get specifications that are ready to be aggregated by version 2!\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.18" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "d2b49c0af2d95979144de75823f7cfbb268839811992fdd0cb17fc1bb54ce815" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |