feat(#479): improve value column conflict error (#483)

Fixes #479
Aarhus-Psychiatry-Research · Feb 22, 2024 · 6fdd951 · 6fdd951
1 parent 746b91c
commit 6fdd951
Showing 1 changed file with 186 additions and 0 deletions.
diff --git a/docs/tutorials/04_from_legacy.ipynb b/docs/tutorials/04_from_legacy.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating features from legacy feature specifications\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Users of timeseriesflattener version 1 should not have to rewrite all their feature specifications, so we have written a simple legacy API! It works for `PredictorGroupSpec`s, since those were the ones we had by far the most of.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Say you have a legacy specification defined like this:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The generated specs are for version 1: True\n"
+     ]
+    }
+   ],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import pandas as pd\n",
+    "from timeseriesflattener.aggregation_fns import (\n",
+    "    boolean,\n",
+    "    change_per_day,\n",
+    "    count,\n",
+    "    earliest,\n",
+    "    latest,\n",
+    "    maximum,\n",
+    "    mean,\n",
+    "    minimum,\n",
+    "    summed,\n",
+    "    variance,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from timeseriesflattener.feature_specs.group_specs import NamedDataframe, PredictorGroupSpec\n",
+    "from timeseriesflattener.feature_specs.single_specs import PredictorSpec as Version1PredictorSpec\n",
+    "\n",
+    "legacy_spec = PredictorGroupSpec(\n",
+    "    lookbehind_days=((1, 2), (3, 4)),\n",
+    "    named_dataframes=[\n",
+    "        NamedDataframe(\n",
+    "            df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"1\", \"value\": 1}),\n",
+    "            name=\"test\",\n",
+    "        ),\n",
+    "        NamedDataframe(\n",
+    "            df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"2\", \"value\": 2}),\n",
+    "            name=\"test2\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    aggregation_fns=[\n",
+    "        latest,\n",
+    "        earliest,\n",
+    "        maximum,\n",
+    "        minimum,\n",
+    "        mean,\n",
+    "        summed,\n",
+    "        count,\n",
+    "        variance,\n",
+    "        boolean,\n",
+    "        change_per_day,\n",
+    "    ],\n",
+    "    fallback=[0],\n",
+    ").create_combinations()\n",
+    "\n",
+    "print(f\"The generated specs are for version 1: {isinstance(legacy_spec[0], Version1PredictorSpec)}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you want the output feature specifications to be compatible with version 2, all you have to do is replace the `PredictorGroupSpec` import:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The generated specs are for version 2: True\n"
+     ]
+    }
+   ],
+   "source": [
+    "from timeseriesflattenerv2.feature_specs.from_legacy import PredictorGroupSpec\n",
+    "from timeseriesflattenerv2.feature_specs.predictor import PredictorSpec as Version2PredictorSpec\n",
+    "\n",
+    "new_specs = PredictorGroupSpec(\n",
+    "    lookbehind_days=((1, 2), (3, 4)),\n",
+    "    named_dataframes=[\n",
+    "        NamedDataframe(\n",
+    "            df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"1\", \"value\": 1}),\n",
+    "            name=\"test\",\n",
+    "        ),\n",
+    "        NamedDataframe(\n",
+    "            df=pd.DataFrame({\"timestamp\": [\"2013-01-01\"], \"dw_ek_borger\": \"2\", \"value\": 2}),\n",
+    "            name=\"test2\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    aggregation_fns=[\n",
+    "        latest,\n",
+    "        earliest,\n",
+    "        maximum,\n",
+    "        minimum,\n",
+    "        mean,\n",
+    "        summed,\n",
+    "        count,\n",
+    "        variance,\n",
+    "        boolean,\n",
+    "        change_per_day,\n",
+    "    ],\n",
+    "    fallback=[0],\n",
+    ").create_combinations()\n",
+    "\n",
+    "print(f\"The generated specs are for version 2: {isinstance(new_specs[0], Version2PredictorSpec)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And then you get specifications that are ready to be aggregated by version 2!\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "d2b49c0af2d95979144de75823f7cfbb268839811992fdd0cb17fc1bb54ce815"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}