diff --git a/docs/tutorials/04_from_legacy.ipynb b/docs/tutorials/04_from_legacy.ipynb new file mode 100644 index 00000000..efe14197 --- /dev/null +++ b/docs/tutorials/04_from_legacy.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating features from legacy feature specifications\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Users of timeseriesflattener version 1 should not have to rewrite all their feature specifications, so we have written a simple legacy API! It works for `PredictorGroupSpec`s, since those were the ones we had by far the most of.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Say you have a legacy specification defined like this:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The generated specs are for version 1: True\n" + ] + } + ], + "source": [ + "from __future__ import annotations\n", + "\n", + "import pandas as pd\n", + "from timeseriesflattener.aggregation_fns import (\n", + " boolean,\n", + " change_per_day,\n", + " count,\n", + " earliest,\n", + " latest,\n", + " maximum,\n", + " mean,\n", + " minimum,\n", + " summed,\n", + " variance,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from timeseriesflattener.feature_specs.group_specs import NamedDataframe, PredictorGroupSpec\n", + "from timeseriesflattener.feature_specs.single_specs import PredictorSpec as Version1PredictorSpec\n", + "\n", + "legacy_spec = PredictorGroupSpec(\n", + " lookbehind_days=((1, 2), (3, 4)),\n", + " named_dataframes=[\n", + " NamedDataframe(\n", + " df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"1\", \"value\": 1}),\n", + " name=\"test\",\n", + " ),\n", + " NamedDataframe(\n", + " df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"2\", \"value\": 2}),\n", + " name=\"test2\",\n", + " ),\n", + " ],\n", + " aggregation_fns=[\n", + " latest,\n", + " earliest,\n", + " maximum,\n", + " minimum,\n", + " mean,\n", + " summed,\n", + " count,\n", + " variance,\n", + " boolean,\n", + " change_per_day,\n", + " ],\n", + " fallback=[0],\n", + ").create_combinations()\n", + "\n", + "print(f\"The generated specs are for version 1: {isinstance(legacy_spec[0], Version1PredictorSpec)}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want the output feature specifications to be compatible with version 2, all you have to do is replace the `PredictorGroupSpec` import:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The generated specs are for version 2: True\n" + ] + } + ], + "source": [ + "from timeseriesflattenerv2.feature_specs.from_legacy import PredictorGroupSpec\n", + "from timeseriesflattenerv2.feature_specs.predictor import PredictorSpec as Version2PredictorSpec\n", + "\n", + "new_specs = PredictorGroupSpec(\n", + " lookbehind_days=((1, 2), (3, 4)),\n", + " named_dataframes=[\n", + " NamedDataframe(\n", + " df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"1\", \"value\": 1}),\n", + " name=\"test\",\n", + " ),\n", + " NamedDataframe(\n", + " df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"2\", \"value\": 2}),\n", + " name=\"test2\",\n", + " ),\n", + " ],\n", + " aggregation_fns=[\n", + " latest,\n", + " earliest,\n", + " maximum,\n", + " minimum,\n", + " mean,\n", + " summed,\n", + " count,\n", + " variance,\n", + " boolean,\n", + " change_per_day,\n", + " ],\n", + " fallback=[0],\n", + ").create_combinations()\n", + "\n", + "print(f\"The generated specs are for version 2: {isinstance(new_specs[0], Version2PredictorSpec)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And then you get specifications that are ready to be aggregated by version 2!\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "d2b49c0af2d95979144de75823f7cfbb268839811992fdd0cb17fc1bb54ce815" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}