From 6c1d6cad21031a65171ac9f8339a444de294d42b Mon Sep 17 00:00:00 2001
From: Martin Bernstorff <martinbernstorff@gmail.com>
Date: Thu, 22 Feb 2024 13:35:57 +0100
Subject: [PATCH] docs(#475): delete 02_advanced.ipynb

Fixes #475
---
 docs/tutorials/02_advanced.ipynb | 549 -------------------------------
 1 file changed, 549 deletions(-)
 delete mode 100644 docs/tutorials/02_advanced.ipynb
diff --git a/docs/tutorials/02_advanced.ipynb b/docs/tutorials/02_advanced.ipynb
deleted file mode 100644
index 7ab81576..00000000
--- a/docs/tutorials/02_advanced.ipynb
+++ /dev/null
@@ -1,549 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Advanced Tutorial"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In the basic tutorial we covered how to add static features, predictors and outcomes.\n",
-    "In this tutorial, we'll expand on that, covering how to effectively add many features by:\n",
-    "1. Creating feature combinations from specifications,\n",
-    "2. Using caching, so you can iterate on your datasets without having to complete full computations every time\n"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Creating feature combinations\n",
-    "Manually specifying a handful of features one at a time is rather straightforward, but what if you want to generate hundreds of features? Or want to have multiple different lookbehind windows, e.g. a month, 6 months and a year? Then the amount of code you'll have to write will grow quite substantially and becomes time consuming and hard to navigate.\n",
-    "\n",
-    "To solve this problem, we implemented feature group specifications. They allow you to combinatorially create features. Let's look at an example:\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "from pprint import pprint as pprint\n",
-    "\n",
-    "import numpy as np\n",
-    "from timeseriesflattener.aggregation_fns import maximum, mean\n",
-    "from timeseriesflattener.feature_specs.group_specs import NamedDataframe, PredictorGroupSpec\n",
-    "from timeseriesflattener.testing.load_synth_data import load_synth_predictor_float"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred_spec_batch = PredictorGroupSpec(\n",
-    "    named_dataframes=[\n",
-    "        NamedDataframe(df=load_synth_predictor_float(), name=\"synth_predictor_float\")\n",
-    "    ],\n",
-    "    lookbehind_days=[(0, 365), (365, 730), 1095],\n",
-    "    fallback=[np.nan],\n",
-    "    aggregation_fns=[mean, maximum],\n",
-    ").create_combinations()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You'll note that:\n",
-    "\n",
-    "1. All attributes are now required to be lists. This makes iteration easier when creating the combinations.\n",
-    "2. We require a named_dataframes sequence. A namedataframe is exactly that; a dataframe and a name. This is used when we create the features in the output, e.g. for a predictor, the output feature using load_synth_predictor_flaot will be called pred_synth_predictor_float_<metadata> because that's the name attributed in the NamedDataframe.\n",
-    "\n",
-    "Let's check that the results look good."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "––––––––– We created 6 combinations of predictors. ––––––––––\n",
-      "[{'aggregation_fn': 'mean',\n",
-      "  'feature_name': 'synth_predictor_float',\n",
-      "  'lookbehind_days': LookPeriod(min_days=0.0, max_days=365.0)},\n",
-      " {'aggregation_fn': 'maximum',\n",
-      "  'feature_name': 'synth_predictor_float',\n",
-      "  'lookbehind_days': LookPeriod(min_days=0.0, max_days=365.0)},\n",
-      " {'aggregation_fn': 'mean',\n",
-      "  'feature_name': 'synth_predictor_float',\n",
-      "  'lookbehind_days': LookPeriod(min_days=365.0, max_days=730.0)},\n",
-      " {'aggregation_fn': 'maximum',\n",
-      "  'feature_name': 'synth_predictor_float',\n",
-      "  'lookbehind_days': LookPeriod(min_days=365.0, max_days=730.0)},\n",
-      " {'aggregation_fn': 'mean',\n",
-      "  'feature_name': 'synth_predictor_float',\n",
-      "  'lookbehind_days': LookPeriod(min_days=0, max_days=1095.0)},\n",
-      " {'aggregation_fn': 'maximum',\n",
-      "  'feature_name': 'synth_predictor_float',\n",
-      "  'lookbehind_days': LookPeriod(min_days=0, max_days=1095.0)}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a small summary to highlight the generated predictors\n",
-    "pred_spec_batch_summary = [\n",
-    "    {\n",
-    "        \"feature_name\": pred_spec.feature_base_name,\n",
-    "        \"lookbehind_days\": pred_spec.lookbehind_period,\n",
-    "        \"aggregation_fn\": pred_spec.aggregation_fn.__name__,\n",
-    "    }\n",
-    "    for pred_spec in pred_spec_batch\n",
-    "]\n",
-    "print(f\"––––––––– We created {len(pred_spec_batch)} combinations of predictors. ––––––––––\")\n",
-    "pprint(pred_spec_batch_summary)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we know how to create a bunch of feature specifications quickly! But with more features comes more computation. Let's look at caching next, so we can iterate on our datasets more quickly."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Caching"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Timeseriesflattener ships with a class that allows for caching to disk. Let's look at an example of that:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "from skimpy import skim\n",
-    "from timeseriesflattener.feature_cache.cache_to_disk import DiskCache\n",
-    "from timeseriesflattener.flattened_dataset import TimeseriesFlattener\n",
-    "from timeseriesflattener.testing.load_synth_data import load_synth_prediction_times"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-01-18 11:38:02 [INFO] Overriding pred_time_uuid_col_name in cache with pred_time_uuid_col_name passed to init of flattened dataset\n"
-     ]
-    }
-   ],
-   "source": [
-    "ts_flattener = TimeseriesFlattener(\n",
-    "    prediction_times_df=load_synth_prediction_times(),\n",
-    "    entity_id_col_name=\"entity_id\",\n",
-    "    timestamp_col_name=\"timestamp\",\n",
-    "    n_workers=4,\n",
-    "    cache=DiskCache(feature_cache_dir=Path(\".tmp\") / \"feature_cache\"),\n",
-    "    drop_pred_times_with_insufficient_look_distance=True,\n",
-    ")"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "All we need to specify is that we use the DiskCache class, and which directory to save the feature cache to.\n",
-    "\n",
-    "The first time we create features, this will just save them to disk and won't make any difference to performance. But say we want to add two more features - then it'll load the features that it has already computed from disk, and then only compute the two new features.\n",
-    "\n",
-    "Note that DiskCache is an instance of the abstract class FeatureCache. If you want to implement your own cache, for example using REDIS or SQL, all you'll need is to implement the 3 methods in that class. Now, let's compute a dataframe to check that everything works."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ts_flattener.add_spec(pred_spec_batch)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-01-18 11:38:03 [INFO] There were unprocessed specs, computing...\n",
-      "2024-01-18 11:38:03 [INFO] _drop_pred_time_if_insufficient_look_distance: Dropped 6053 (60.53%) rows\n",
-      "2024-01-18 11:38:03 [INFO] Processing 6 temporal features in parallel with 4 workers. Chunksize is 2. If this is above 1, it may take some time for the progress bar to move, as processing is batched. However, this makes for much faster total performance.\n",
-      "  0%|          | 0/6 [00:00<?, ?it/s]/Users/au554730/Desktop/Projects/timeseriesflattener/.venv/lib/python3.10/site-packages/pydantic/_internal/_config.py:269: UserWarning: Valid config keys have changed in V2:\n",
-      "* 'allow_mutation' has been removed\n",
-      "  warnings.warn(message, UserWarning)\n",
-      "/Users/au554730/Desktop/Projects/timeseriesflattener/.venv/lib/python3.10/site-packages/pydantic/_internal/_config.py:269: UserWarning: Valid config keys have changed in V2:\n",
-      "* 'allow_mutation' has been removed\n",
-      "  warnings.warn(message, UserWarning)\n",
-      "/Users/au554730/Desktop/Projects/timeseriesflattener/.venv/lib/python3.10/site-packages/pydantic/_internal/_config.py:269: UserWarning: Valid config keys have changed in V2:\n",
-      "* 'allow_mutation' has been removed\n",
-      "  warnings.warn(message, UserWarning)\n",
-      "100%|██████████| 6/6 [00:02<00:00,  2.17it/s]\n",
-      "2024-01-18 11:38:05 [INFO] Checking alignment of dataframes - this might take a little while (~2 minutes for 1.000 dataframes with 2.000.000 rows).\n",
-      "2024-01-18 11:38:05 [INFO] Starting concatenation. Will take some time on performant systems, e.g. 30s for 100 features and 2_000_000 prediction times. This is normal.\n",
-      "2024-01-18 11:38:05 [INFO] Concatenation took 0.01 seconds\n",
-      "2024-01-18 11:38:05 [INFO] Merging with original df\n"
-     ]
-    }
-   ],
-   "source": [
-    "df = ts_flattener.get_df()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n",
-       "│ <span style=\"font-style: italic\">         Data Summary         </span> <span style=\"font-style: italic\">      Data Types       </span>                                                          │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓                                                          │\n",
-       "│ ┃<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\"> dataframe         </span>┃<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\"> Values </span>┃ ┃<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\"> Column Type </span>┃<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\"> Count </span>┃                                                          │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩                                                          │\n",
-       "│ │ Number of rows    │ 3947   │ │ float64     │ 6     │                                                          │\n",
-       "│ │ Number of columns │ 9      │ │ int64       │ 1     │                                                          │\n",
-       "│ └───────────────────┴────────┘ │ datetime64  │ 1     │                                                          │\n",
-       "│                                │ string      │ 1     │                                                          │\n",
-       "│                                └─────────────┴───────┘                                                          │\n",
-       "│ <span style=\"font-style: italic\">                                                    number                                                    </span>  │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓  │\n",
-       "│ ┃<span style=\"font-weight: bold\"> column_name                </span>┃<span style=\"font-weight: bold\"> NA    </span>┃<span style=\"font-weight: bold\"> NA %   </span>┃<span style=\"font-weight: bold\"> mean   </span>┃<span style=\"font-weight: bold\"> sd     </span>┃<span style=\"font-weight: bold\"> p0       </span>┃<span style=\"font-weight: bold\"> p25   </span>┃<span style=\"font-weight: bold\"> p75   </span>┃<span style=\"font-weight: bold\"> p100   </span>┃<span style=\"font-weight: bold\"> hist   </span>┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">entity_id                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    0</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">     0</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  5000</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  2900</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">       0</span> │ <span style=\"color: #008080; text-decoration-color: #008080\"> 2600</span> │ <span style=\"color: #008080; text-decoration-color: #008080\"> 7400</span> │ <span style=\"color: #008080; text-decoration-color: #008080\"> 10000</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">█████▇</span> │  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">pred_synth_predictor      </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    7</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  0.18</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">     5</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   1.3</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    0.29</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  4.1</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  5.8</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   9.9</span> │ <span style=\"color: #008000; text-decoration-color: #008000\"> ▂█▇▁ </span> │  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">pred_synth_predictor      </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  510</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    13</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   6.6</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   2.6</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   0.024</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  4.8</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  8.8</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">▂▂▃▄▆█</span> │  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">pred_synth_predictor      </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  530</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    14</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   6.6</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   2.6</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  0.0084</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  4.8</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  8.8</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">▁▂▃▄▆█</span> │  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">pred_synth_predictor      </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    7</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  0.18</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   8.4</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   1.5</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    0.29</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  7.8</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  9.5</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">   ▁▃█</span> │  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">pred_synth_predictor      </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  510</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    13</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   5.1</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   2.2</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   0.024</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  3.6</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  6.5</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">▂▄██▅▂</span> │  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">pred_synth_predictor      </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  530</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    14</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">     5</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   2.1</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  0.0084</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  3.6</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">  6.4</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   9.9</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">▂▄██▄▂</span> │  │\n",
-       "│ └────────────────────────────┴───────┴────────┴────────┴────────┴──────────┴───────┴───────┴────────┴────────┘  │\n",
-       "│ <span style=\"font-style: italic\">                                                   datetime                                                   </span>  │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓  │\n",
-       "│ ┃<span style=\"font-weight: bold\"> column_name      </span>┃<span style=\"font-weight: bold\"> NA   </span>┃<span style=\"font-weight: bold\"> NA %    </span>┃<span style=\"font-weight: bold\"> first                      </span>┃<span style=\"font-weight: bold\"> last                       </span>┃<span style=\"font-weight: bold\"> frequency    </span>┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">timestamp       </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">   0</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">      0</span> │ <span style=\"color: #800000; text-decoration-color: #800000\">   1968-01-02 05:12:00    </span> │ <span style=\"color: #800000; text-decoration-color: #800000\">   1969-12-31 21:42:00    </span> │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">None        </span> │  │\n",
-       "│ └──────────────────┴──────┴─────────┴────────────────────────────┴────────────────────────────┴──────────────┘  │\n",
-       "│ <span style=\"font-style: italic\">                                                    string                                                    </span>  │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓  │\n",
-       "│ ┃<span style=\"font-weight: bold\"> column_name                           </span>┃<span style=\"font-weight: bold\"> NA    </span>┃<span style=\"font-weight: bold\"> NA %      </span>┃<span style=\"font-weight: bold\"> words per row            </span>┃<span style=\"font-weight: bold\"> total words         </span>┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩  │\n",
-       "│ │ <span style=\"color: #af87ff; text-decoration-color: #af87ff\">prediction_time_uuid                 </span> │ <span style=\"color: #008080; text-decoration-color: #008080\">    0</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">        0</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">                       1</span> │ <span style=\"color: #008080; text-decoration-color: #008080\">               3900</span> │  │\n",
-       "│ └───────────────────────────────────────┴───────┴───────────┴──────────────────────────┴─────────────────────┘  │\n",
-       "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "╭──────────────────────────────────────────────── skimpy summary ─────────────────────────────────────────────────╮\n",
-       "│ \u001b[3m         Data Summary         \u001b[0m \u001b[3m      Data Types       \u001b[0m                                                          │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓ ┏━━━━━━━━━━━━━┳━━━━━━━┓                                                          │\n",
-       "│ ┃\u001b[1;36m \u001b[0m\u001b[1;36mdataframe        \u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mValues\u001b[0m\u001b[1;36m \u001b[0m┃ ┃\u001b[1;36m \u001b[0m\u001b[1;36mColumn Type\u001b[0m\u001b[1;36m \u001b[0m┃\u001b[1;36m \u001b[0m\u001b[1;36mCount\u001b[0m\u001b[1;36m \u001b[0m┃                                                          │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩ ┡━━━━━━━━━━━━━╇━━━━━━━┩                                                          │\n",
-       "│ │ Number of rows    │ 3947   │ │ float64     │ 6     │                                                          │\n",
-       "│ │ Number of columns │ 9      │ │ int64       │ 1     │                                                          │\n",
-       "│ └───────────────────┴────────┘ │ datetime64  │ 1     │                                                          │\n",
-       "│                                │ string      │ 1     │                                                          │\n",
-       "│                                └─────────────┴───────┘                                                          │\n",
-       "│ \u001b[3m                                                    number                                                    \u001b[0m  │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓  │\n",
-       "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name               \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA %  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mmean  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1msd    \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp0      \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp25  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp75  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mp100  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mhist  \u001b[0m\u001b[1m \u001b[0m┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩  │\n",
-       "│ │ \u001b[38;5;141mentity_id                 \u001b[0m │ \u001b[36m    0\u001b[0m │ \u001b[36m     0\u001b[0m │ \u001b[36m  5000\u001b[0m │ \u001b[36m  2900\u001b[0m │ \u001b[36m       0\u001b[0m │ \u001b[36m 2600\u001b[0m │ \u001b[36m 7400\u001b[0m │ \u001b[36m 10000\u001b[0m │ \u001b[32m█████▇\u001b[0m │  │\n",
-       "│ │ \u001b[38;5;141mpred_synth_predictor      \u001b[0m │ \u001b[36m    7\u001b[0m │ \u001b[36m  0.18\u001b[0m │ \u001b[36m     5\u001b[0m │ \u001b[36m   1.3\u001b[0m │ \u001b[36m    0.29\u001b[0m │ \u001b[36m  4.1\u001b[0m │ \u001b[36m  5.8\u001b[0m │ \u001b[36m   9.9\u001b[0m │ \u001b[32m ▂█▇▁ \u001b[0m │  │\n",
-       "│ │ \u001b[38;5;141mpred_synth_predictor      \u001b[0m │ \u001b[36m  510\u001b[0m │ \u001b[36m    13\u001b[0m │ \u001b[36m   6.6\u001b[0m │ \u001b[36m   2.6\u001b[0m │ \u001b[36m   0.024\u001b[0m │ \u001b[36m  4.8\u001b[0m │ \u001b[36m  8.8\u001b[0m │ \u001b[36m    10\u001b[0m │ \u001b[32m▂▂▃▄▆█\u001b[0m │  │\n",
-       "│ │ \u001b[38;5;141mpred_synth_predictor      \u001b[0m │ \u001b[36m  530\u001b[0m │ \u001b[36m    14\u001b[0m │ \u001b[36m   6.6\u001b[0m │ \u001b[36m   2.6\u001b[0m │ \u001b[36m  0.0084\u001b[0m │ \u001b[36m  4.8\u001b[0m │ \u001b[36m  8.8\u001b[0m │ \u001b[36m    10\u001b[0m │ \u001b[32m▁▂▃▄▆█\u001b[0m │  │\n",
-       "│ │ \u001b[38;5;141mpred_synth_predictor      \u001b[0m │ \u001b[36m    7\u001b[0m │ \u001b[36m  0.18\u001b[0m │ \u001b[36m   8.4\u001b[0m │ \u001b[36m   1.5\u001b[0m │ \u001b[36m    0.29\u001b[0m │ \u001b[36m  7.8\u001b[0m │ \u001b[36m  9.5\u001b[0m │ \u001b[36m    10\u001b[0m │ \u001b[32m   ▁▃█\u001b[0m │  │\n",
-       "│ │ \u001b[38;5;141mpred_synth_predictor      \u001b[0m │ \u001b[36m  510\u001b[0m │ \u001b[36m    13\u001b[0m │ \u001b[36m   5.1\u001b[0m │ \u001b[36m   2.2\u001b[0m │ \u001b[36m   0.024\u001b[0m │ \u001b[36m  3.6\u001b[0m │ \u001b[36m  6.5\u001b[0m │ \u001b[36m    10\u001b[0m │ \u001b[32m▂▄██▅▂\u001b[0m │  │\n",
-       "│ │ \u001b[38;5;141mpred_synth_predictor      \u001b[0m │ \u001b[36m  530\u001b[0m │ \u001b[36m    14\u001b[0m │ \u001b[36m     5\u001b[0m │ \u001b[36m   2.1\u001b[0m │ \u001b[36m  0.0084\u001b[0m │ \u001b[36m  3.6\u001b[0m │ \u001b[36m  6.4\u001b[0m │ \u001b[36m   9.9\u001b[0m │ \u001b[32m▂▄██▄▂\u001b[0m │  │\n",
-       "│ └────────────────────────────┴───────┴────────┴────────┴────────┴──────────┴───────┴───────┴────────┴────────┘  │\n",
-       "│ \u001b[3m                                                   datetime                                                   \u001b[0m  │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓  │\n",
-       "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA %   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfirst                     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mlast                      \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfrequency   \u001b[0m\u001b[1m \u001b[0m┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩  │\n",
-       "│ │ \u001b[38;5;141mtimestamp       \u001b[0m │ \u001b[36m   0\u001b[0m │ \u001b[36m      0\u001b[0m │ \u001b[31m   1968-01-02 05:12:00    \u001b[0m │ \u001b[31m   1969-12-31 21:42:00    \u001b[0m │ \u001b[38;5;141mNone        \u001b[0m │  │\n",
-       "│ └──────────────────┴──────┴─────────┴────────────────────────────┴────────────────────────────┴──────────────┘  │\n",
-       "│ \u001b[3m                                                    string                                                    \u001b[0m  │\n",
-       "│ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓  │\n",
-       "│ ┃\u001b[1m \u001b[0m\u001b[1mcolumn_name                          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mNA %     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mwords per row           \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mtotal words        \u001b[0m\u001b[1m \u001b[0m┃  │\n",
-       "│ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩  │\n",
-       "│ │ \u001b[38;5;141mprediction_time_uuid                 \u001b[0m │ \u001b[36m    0\u001b[0m │ \u001b[36m        0\u001b[0m │ \u001b[36m                       1\u001b[0m │ \u001b[36m               3900\u001b[0m │  │\n",
-       "│ └───────────────────────────────────────┴───────┴───────────┴──────────────────────────┴─────────────────────┘  │\n",
-       "╰────────────────────────────────────────────────────── End ──────────────────────────────────────────────────────╯\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "['entity_id',\n",
-       " 'timestamp',\n",
-       " 'prediction_time_uuid',\n",
-       " 'pred_synth_predictor_float_within_0_to_1095_days_mean_fallback_nan',\n",
-       " 'pred_synth_predictor_float_within_365_to_730_days_maximum_fallback_nan',\n",
-       " 'pred_synth_predictor_float_within_0_to_365_days_maximum_fallback_nan',\n",
-       " 'pred_synth_predictor_float_within_0_to_1095_days_maximum_fallback_nan',\n",
-       " 'pred_synth_predictor_float_within_365_to_730_days_mean_fallback_nan',\n",
-       " 'pred_synth_predictor_float_within_0_to_365_days_mean_fallback_nan']"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "skim(df)\n",
-    "\n",
-    "list(df.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<style type=\"text/css\">\n",
-       "</style>\n",
-       "<table id=\"T_c1c6b\" style=\"font-size: 14px\">\n",
-       "  <thead>\n",
-       "    <tr>\n",
-       "      <th class=\"blank level0\" >&nbsp;</th>\n",
-       "      <th id=\"T_c1c6b_level0_col0\" class=\"col_heading level0 col0\" >entity_id</th>\n",
-       "      <th id=\"T_c1c6b_level0_col1\" class=\"col_heading level0 col1\" >timestamp</th>\n",
-       "      <th id=\"T_c1c6b_level0_col2\" class=\"col_heading level0 col2\" >prediction_time_uuid</th>\n",
-       "      <th id=\"T_c1c6b_level0_col3\" class=\"col_heading level0 col3\" >pred_1</th>\n",
-       "      <th id=\"T_c1c6b_level0_col4\" class=\"col_heading level0 col4\" >pred_2</th>\n",
-       "      <th id=\"T_c1c6b_level0_col5\" class=\"col_heading level0 col5\" >pred_3</th>\n",
-       "      <th id=\"T_c1c6b_level0_col6\" class=\"col_heading level0 col6\" >pred_4</th>\n",
-       "      <th id=\"T_c1c6b_level0_col7\" class=\"col_heading level0 col7\" >pred_5</th>\n",
-       "      <th id=\"T_c1c6b_level0_col8\" class=\"col_heading level0 col8\" >pred_6</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
-       "      <td id=\"T_c1c6b_row0_col0\" class=\"data row0 col0\" >9903</td>\n",
-       "      <td id=\"T_c1c6b_row0_col1\" class=\"data row0 col1\" >1968-05-09 21:24:00</td>\n",
-       "      <td id=\"T_c1c6b_row0_col2\" class=\"data row0 col2\" >9903-1968-05-09-21-24-00</td>\n",
-       "      <td id=\"T_c1c6b_row0_col3\" class=\"data row0 col3\" >2.864626</td>\n",
-       "      <td id=\"T_c1c6b_row0_col4\" class=\"data row0 col4\" >2.194319</td>\n",
-       "      <td id=\"T_c1c6b_row0_col5\" class=\"data row0 col5\" >0.154981</td>\n",
-       "      <td id=\"T_c1c6b_row0_col6\" class=\"data row0 col6\" >5.931553</td>\n",
-       "      <td id=\"T_c1c6b_row0_col7\" class=\"data row0 col7\" >1.408655</td>\n",
-       "      <td id=\"T_c1c6b_row0_col8\" class=\"data row0 col8\" >0.154981</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
-       "      <td id=\"T_c1c6b_row1_col0\" class=\"data row1 col0\" >4927</td>\n",
-       "      <td id=\"T_c1c6b_row1_col1\" class=\"data row1 col1\" >1968-06-30 12:13:00</td>\n",
-       "      <td id=\"T_c1c6b_row1_col2\" class=\"data row1 col2\" >4927-1968-06-30-12-13-00</td>\n",
-       "      <td id=\"T_c1c6b_row1_col3\" class=\"data row1 col3\" >4.466599</td>\n",
-       "      <td id=\"T_c1c6b_row1_col4\" class=\"data row1 col4\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row1_col5\" class=\"data row1 col5\" >6.730694</td>\n",
-       "      <td id=\"T_c1c6b_row1_col6\" class=\"data row1 col6\" >8.630901</td>\n",
-       "      <td id=\"T_c1c6b_row1_col7\" class=\"data row1 col7\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row1_col8\" class=\"data row1 col8\" >4.957251</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
-       "      <td id=\"T_c1c6b_row2_col0\" class=\"data row2 col0\" >3157</td>\n",
-       "      <td id=\"T_c1c6b_row2_col1\" class=\"data row2 col1\" >1969-10-07 05:01:00</td>\n",
-       "      <td id=\"T_c1c6b_row2_col2\" class=\"data row2 col2\" >3157-1969-10-07-05-01-00</td>\n",
-       "      <td id=\"T_c1c6b_row2_col3\" class=\"data row2 col3\" >4.168456</td>\n",
-       "      <td id=\"T_c1c6b_row2_col4\" class=\"data row2 col4\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row2_col5\" class=\"data row2 col5\" >5.243176</td>\n",
-       "      <td id=\"T_c1c6b_row2_col6\" class=\"data row2 col6\" >5.243176</td>\n",
-       "      <td id=\"T_c1c6b_row2_col7\" class=\"data row2 col7\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row2_col8\" class=\"data row2 col8\" >5.068323</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
-       "      <td id=\"T_c1c6b_row3_col0\" class=\"data row3 col0\" >9793</td>\n",
-       "      <td id=\"T_c1c6b_row3_col1\" class=\"data row3 col1\" >1968-12-15 12:59:00</td>\n",
-       "      <td id=\"T_c1c6b_row3_col2\" class=\"data row3 col2\" >9793-1968-12-15-12-59-00</td>\n",
-       "      <td id=\"T_c1c6b_row3_col3\" class=\"data row3 col3\" >7.144959</td>\n",
-       "      <td id=\"T_c1c6b_row3_col4\" class=\"data row3 col4\" >8.293266</td>\n",
-       "      <td id=\"T_c1c6b_row3_col5\" class=\"data row3 col5\" >9.708976</td>\n",
-       "      <td id=\"T_c1c6b_row3_col6\" class=\"data row3 col6\" >9.727182</td>\n",
-       "      <td id=\"T_c1c6b_row3_col7\" class=\"data row3 col7\" >6.230417</td>\n",
-       "      <td id=\"T_c1c6b_row3_col8\" class=\"data row3 col8\" >8.091755</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
-       "      <td id=\"T_c1c6b_row4_col0\" class=\"data row4 col0\" >9861</td>\n",
-       "      <td id=\"T_c1c6b_row4_col1\" class=\"data row4 col1\" >1969-01-22 17:34:00</td>\n",
-       "      <td id=\"T_c1c6b_row4_col2\" class=\"data row4 col2\" >9861-1969-01-22-17-34-00</td>\n",
-       "      <td id=\"T_c1c6b_row4_col3\" class=\"data row4 col3\" >3.669635</td>\n",
-       "      <td id=\"T_c1c6b_row4_col4\" class=\"data row4 col4\" >5.491415</td>\n",
-       "      <td id=\"T_c1c6b_row4_col5\" class=\"data row4 col5\" >3.130283</td>\n",
-       "      <td id=\"T_c1c6b_row4_col6\" class=\"data row4 col6\" >6.217161</td>\n",
-       "      <td id=\"T_c1c6b_row4_col7\" class=\"data row4 col7\" >3.309197</td>\n",
-       "      <td id=\"T_c1c6b_row4_col8\" class=\"data row4 col8\" >3.130283</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
-       "      <td id=\"T_c1c6b_row5_col0\" class=\"data row5 col0\" >657</td>\n",
-       "      <td id=\"T_c1c6b_row5_col1\" class=\"data row5 col1\" >1969-04-14 15:47:00</td>\n",
-       "      <td id=\"T_c1c6b_row5_col2\" class=\"data row5 col2\" >657-1969-04-14-15-47-00</td>\n",
-       "      <td id=\"T_c1c6b_row5_col3\" class=\"data row5 col3\" >7.391514</td>\n",
-       "      <td id=\"T_c1c6b_row5_col4\" class=\"data row5 col4\" >7.903614</td>\n",
-       "      <td id=\"T_c1c6b_row5_col5\" class=\"data row5 col5\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row5_col6\" class=\"data row5 col6\" >7.903614</td>\n",
-       "      <td id=\"T_c1c6b_row5_col7\" class=\"data row5 col7\" >7.903614</td>\n",
-       "      <td id=\"T_c1c6b_row5_col8\" class=\"data row5 col8\" >nan</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
-       "      <td id=\"T_c1c6b_row6_col0\" class=\"data row6 col0\" >7916</td>\n",
-       "      <td id=\"T_c1c6b_row6_col1\" class=\"data row6 col1\" >1968-12-20 03:38:00</td>\n",
-       "      <td id=\"T_c1c6b_row6_col2\" class=\"data row6 col2\" >7916-1968-12-20-03-38-00</td>\n",
-       "      <td id=\"T_c1c6b_row6_col3\" class=\"data row6 col3\" >4.251704</td>\n",
-       "      <td id=\"T_c1c6b_row6_col4\" class=\"data row6 col4\" >6.084523</td>\n",
-       "      <td id=\"T_c1c6b_row6_col5\" class=\"data row6 col5\" >4.318586</td>\n",
-       "      <td id=\"T_c1c6b_row6_col6\" class=\"data row6 col6\" >6.979156</td>\n",
-       "      <td id=\"T_c1c6b_row6_col7\" class=\"data row6 col7\" >6.084523</td>\n",
-       "      <td id=\"T_c1c6b_row6_col8\" class=\"data row6 col8\" >3.901992</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
-       "      <td id=\"T_c1c6b_row7_col0\" class=\"data row7 col0\" >2883</td>\n",
-       "      <td id=\"T_c1c6b_row7_col1\" class=\"data row7 col1\" >1968-01-28 21:50:00</td>\n",
-       "      <td id=\"T_c1c6b_row7_col2\" class=\"data row7 col2\" >2883-1968-01-28-21-50-00</td>\n",
-       "      <td id=\"T_c1c6b_row7_col3\" class=\"data row7 col3\" >4.712403</td>\n",
-       "      <td id=\"T_c1c6b_row7_col4\" class=\"data row7 col4\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row7_col5\" class=\"data row7 col5\" >8.257742</td>\n",
-       "      <td id=\"T_c1c6b_row7_col6\" class=\"data row7 col6\" >8.257742</td>\n",
-       "      <td id=\"T_c1c6b_row7_col7\" class=\"data row7 col7\" >nan</td>\n",
-       "      <td id=\"T_c1c6b_row7_col8\" class=\"data row7 col8\" >8.257742</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
-       "      <td id=\"T_c1c6b_row8_col0\" class=\"data row8 col0\" >1515</td>\n",
-       "      <td id=\"T_c1c6b_row8_col1\" class=\"data row8 col1\" >1968-07-18 08:28:00</td>\n",
-       "      <td id=\"T_c1c6b_row8_col2\" class=\"data row8 col2\" >1515-1968-07-18-08-28-00</td>\n",
-       "      <td id=\"T_c1c6b_row8_col3\" class=\"data row8 col3\" >3.112700</td>\n",
-       "      <td id=\"T_c1c6b_row8_col4\" class=\"data row8 col4\" >3.684614</td>\n",
-       "      <td id=\"T_c1c6b_row8_col5\" class=\"data row8 col5\" >8.654839</td>\n",
-       "      <td id=\"T_c1c6b_row8_col6\" class=\"data row8 col6\" >8.654839</td>\n",
-       "      <td id=\"T_c1c6b_row8_col7\" class=\"data row8 col7\" >3.104674</td>\n",
-       "      <td id=\"T_c1c6b_row8_col8\" class=\"data row8 col8\" >2.907289</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th id=\"T_c1c6b_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
-       "      <td id=\"T_c1c6b_row9_col0\" class=\"data row9 col0\" >6754</td>\n",
-       "      <td id=\"T_c1c6b_row9_col1\" class=\"data row9 col1\" >1968-09-21 01:27:00</td>\n",
-       "      <td id=\"T_c1c6b_row9_col2\" class=\"data row9 col2\" >6754-1968-09-21-01-27-00</td>\n",
-       "      <td id=\"T_c1c6b_row9_col3\" class=\"data row9 col3\" >5.082918</td>\n",
-       "      <td id=\"T_c1c6b_row9_col4\" class=\"data row9 col4\" >3.102132</td>\n",
-       "      <td id=\"T_c1c6b_row9_col5\" class=\"data row9 col5\" >2.346644</td>\n",
-       "      <td id=\"T_c1c6b_row9_col6\" class=\"data row9 col6\" >9.657755</td>\n",
-       "      <td id=\"T_c1c6b_row9_col7\" class=\"data row9 col7\" >2.324913</td>\n",
-       "      <td id=\"T_c1c6b_row9_col8\" class=\"data row9 col8\" >2.346644</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n"
-      ],
-      "text/plain": [
-       "<pandas.io.formats.style.Styler at 0x1433ebd00>"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# For displayability, shorten col names\n",
-    "pred_cols = [c for c in df.columns if c.startswith(\"pred_\")]\n",
-    "rename_dict = {c: f\"pred_{i+1}\" for i, c in enumerate(pred_cols)}\n",
-    "df_renamed = df.rename(rename_dict, axis=1)\n",
-    "\n",
-    "# Print a dataframe\n",
-    "base_cols = [\"entity_id\", \"timestamp\", \"prediction_time_uuid\"]\n",
-    "renamed_cols = list(rename_dict.values())\n",
-    "\n",
-    "df_renamed[0:10][base_cols + renamed_cols].style.set_table_attributes('style=\"font-size: 14px\"')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.10.7 ('.venv': poetry)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "d2b49c0af2d95979144de75823f7cfbb268839811992fdd0cb17fc1bb54ce815"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

	entity_id	timestamp	prediction_time_uuid	pred_1	pred_2	pred_3	pred_4	pred_5	pred_6
0	9903	1968-05-09 21:24:00	9903-1968-05-09-21-24-00	2.864626	2.194319	0.154981	5.931553	1.408655	0.154981
1	4927	1968-06-30 12:13:00	4927-1968-06-30-12-13-00	4.466599	nan	6.730694	8.630901	nan	4.957251
2	3157	1969-10-07 05:01:00	3157-1969-10-07-05-01-00	4.168456	nan	5.243176	5.243176	nan	5.068323
3	9793	1968-12-15 12:59:00	9793-1968-12-15-12-59-00	7.144959	8.293266	9.708976	9.727182	6.230417	8.091755
4	9861	1969-01-22 17:34:00	9861-1969-01-22-17-34-00	3.669635	5.491415	3.130283	6.217161	3.309197	3.130283
5	657	1969-04-14 15:47:00	657-1969-04-14-15-47-00	7.391514	7.903614	nan	7.903614	7.903614	nan
6	7916	1968-12-20 03:38:00	7916-1968-12-20-03-38-00	4.251704	6.084523	4.318586	6.979156	6.084523	3.901992
7	2883	1968-01-28 21:50:00	2883-1968-01-28-21-50-00	4.712403	nan	8.257742	8.257742	nan	8.257742
8	1515	1968-07-18 08:28:00	1515-1968-07-18-08-28-00	3.112700	3.684614	8.654839	8.654839	3.104674	2.907289
9	6754	1968-09-21 01:27:00	6754-1968-09-21-01-27-00	5.082918	3.102132	2.346644	9.657755	2.324913	2.346644