From 2e06a6a14c905658196d1848f2f3b4278673c142 Mon Sep 17 00:00:00 2001
From: Albert Sawczyn <albert.sawczyn@pwr.edu.pl>
Date: Mon, 10 Jun 2024 15:38:41 +0200
Subject: [PATCH] refactor: clean notebooks

---
 dvc.lock                                      |  36 ++---
 nbs/Data/02_Dataset_Description_Raw.ipynb     | 142 ++++++-----------
 .../03_Dataset_Description_Instruct.ipynb     | 148 +++++++-----------
 3 files changed, 118 insertions(+), 208 deletions(-)

diff --git a/dvc.lock b/dvc.lock
index 575abeb..7a3675d 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -79,7 +79,7 @@ stages:
     deps:
     - hash: md5
       md5: df2f1d464152f87737c8ebb5b0673854
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json
       size: 2179383
     - hash: md5
@@ -89,7 +89,7 @@ stages:
     outs:
     - hash: md5
       md5: 521a731cc2c45d3eda0656a8e69d505b
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct.json
       size: 307
   evaluate@Unsloth-Llama-3-8B-Instruct-fine-tuned:
@@ -98,7 +98,7 @@ stages:
     deps:
     - hash: md5
       md5: 9199da7e04fb35cc1ce2bbe9dd5cd274
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct-fine-tuned.json
       size: 1891254
     - hash: md5
@@ -108,7 +108,7 @@ stages:
     outs:
     - hash: md5
       md5: 6a0eb30a14687342bc86ae80253cd60c
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/metrics_Unsloth-Llama-3-8B-Instruct-fine-tuned.json
       size: 306
   evaluate@Unsloth-Mistral-7B-Instruct-v0.3:
@@ -117,7 +117,7 @@ stages:
     deps:
     - hash: md5
       md5: c2e03f3fbd29c744023bdac7e1007265
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3.json
       size: 2007040
     - hash: md5
@@ -127,7 +127,7 @@ stages:
     outs:
     - hash: md5
       md5: 091b8888275600052dd2dcdd36a55588
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3.json
       size: 305
   evaluate@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned:
@@ -136,7 +136,7 @@ stages:
     deps:
     - hash: md5
       md5: a4fda5774b367e8924cf07f3bf271922
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
       size: 1834778
     - hash: md5
@@ -146,7 +146,7 @@ stages:
     outs:
     - hash: md5
       md5: 3b3589929112cb2f199044d240e87bcc
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/metrics_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
       size: 305
   predict@Bielik-7B-Instruct-v0.1:
@@ -227,7 +227,7 @@ stages:
     outs:
     - hash: md5
       md5: adf03a2b51a7a9cd4431c884a89f6497
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Mistral-7B-Instruct-v0.2-fine-tuned.json
       size: 1843278
   predict@Unsloth-Llama-3-8B-Instruct:
@@ -248,7 +248,7 @@ stages:
     outs:
     - hash: md5
       md5: df2f1d464152f87737c8ebb5b0673854
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct.json
       size: 2179383
   predict@Unsloth-Llama-3-8B-Instruct-fine-tuned:
@@ -269,7 +269,7 @@ stages:
     outs:
     - hash: md5
       md5: 9199da7e04fb35cc1ce2bbe9dd5cd274
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Llama-3-8B-Instruct-fine-tuned.json
       size: 1891254
   predict@Unsloth-Mistral-7B-Instruct-v0.3:
@@ -290,7 +290,7 @@ stages:
     outs:
     - hash: md5
       md5: c2e03f3fbd29c744023bdac7e1007265
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3.json
       size: 2007040
   predict@Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned:
@@ -311,7 +311,7 @@ stages:
     outs:
     - hash: md5
       md5: a4fda5774b367e8924cf07f3bf271922
-      path:
+      path: 
         data/experiments/predict/pl-court-instruct/outputs_Unsloth-Mistral-7B-Instruct-v0.3-fine-tuned.json
       size: 1834778
   sft@Mistral-7B-Instruct-v0.2:
@@ -436,8 +436,8 @@ stages:
       nfiles: 17
     - path: nbs/Data/02_Dataset_Description_Raw.ipynb
       hash: md5
-      md5: 520ae4cd67c4e72e97301a496383adf4
-      size: 74776
+      md5: d3d7509d084b85676857e13a2f20b82a
+      size: 73872
     outs:
     - path: data/datasets/pl/readme/raw/
       hash: md5
@@ -450,11 +450,11 @@ stages:
     deps:
     - path: nbs/Data/03_Dataset_Description_Instruct.ipynb
       hash: md5
-      md5: c403ede420e9c30b07920bc528bf8c7e
-      size: 16852
+      md5: 27e6d517445028d45e5c40b22febece4
+      size: 16215
     outs:
     - path: data/datasets/pl/readme/instruct/
       hash: md5
       md5: de02794df3d74d86f8610f040a17dcbe.dir
       size: 144326
-      nfiles: 5
\ No newline at end of file
+      nfiles: 5
diff --git a/nbs/Data/02_Dataset_Description_Raw.ipynb b/nbs/Data/02_Dataset_Description_Raw.ipynb
index ae4bb2e..8cba868 100644
--- a/nbs/Data/02_Dataset_Description_Raw.ipynb
+++ b/nbs/Data/02_Dataset_Description_Raw.ipynb
@@ -2,14 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "initial_id",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import warnings\n",
     "\n",
@@ -27,29 +23,22 @@
     "transformers.logging.set_verbosity_error()\n",
     "datasets.logging.set_verbosity_error()\n",
     "datasets.utils.disable_progress_bars()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3105d222",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "raw_ds = pl.scan_parquet(source=\"../../data/datasets/pl/raw/*\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "id": "bac42f58ea3c3d96",
+   "metadata": {},
    "source": [
     "---\n",
     "language: {{language}}\n",
@@ -60,8 +49,7 @@
     "pretty_name: {{pretty_name}}\n",
     "tags: {{tags}}\n",
     "---"
-   ],
-   "id": "bac42f58ea3c3d96"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -292,21 +280,23 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "5c2f63ac",
    "metadata": {},
+   "outputs": [],
    "source": [
     "court_distribution = raw_ds.drop_nulls(subset=\"court_name\").select(\"court_name\").group_by(\"court_name\").len().sort(\"len\", descending=True).collect().to_pandas()\n",
     "ax = sns.histplot(data=court_distribution, x=\"len\", log_scale=True, kde=True)\n",
     "ax.set(title=\"Distribution of judgments per court\", xlabel=\"#Judgements in single court\", ylabel=\"Count\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "12acf455",
    "metadata": {},
+   "outputs": [],
    "source": [
     "judgements_per_year = raw_ds.select(\"date\").collect()[\"date\"].str.split(\" \").list.get(0).str.to_date().dt.year().value_counts().sort(\"date\").to_pandas()\n",
     "judgements_per_year = judgements_per_year[judgements_per_year[\"date\"] < 2024]\n",
@@ -316,14 +306,14 @@
     "ax.set(xlabel=\"Year\", ylabel=\"Number of Judgements\", title=\"Yearly Number of Judgements\", yscale=\"log\")\n",
     "plt.xticks(rotation=90)\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3df2d2fa",
    "metadata": {},
+   "outputs": [],
    "source": [
     "types = raw_ds.fill_null(value=\"<null>\").select(\"type\").group_by(\"type\").len().sort(\"len\", descending=True).collect().to_pandas()\n",
     "\n",
@@ -331,69 +321,51 @@
     "ax = sns.barplot(data=types, x=\"len\", y=\"type\", errorbar=None, ax=ax)\n",
     "ax.set(xlabel=\"Count\", ylabel=\"Type\", title=\"Judgement types cardinality\", xscale=\"log\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e0801346",
    "metadata": {},
+   "outputs": [],
    "source": [
     "num_judges = raw_ds.with_columns([pl.col(\"judges\").list.len().alias(\"num_judges\")]).select(\"num_judges\").sort(\"num_judges\").collect().to_pandas()\n",
     "ax = sns.histplot(data=num_judges, x=\"num_judges\", bins=num_judges[\"num_judges\"].nunique())\n",
     "ax.set(xlabel=\"#Judges per judgement\", ylabel=\"Count\", yscale=\"log\", title=\"#Judges per single judgement\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "758f41b7",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "num_lb = raw_ds.with_columns([pl.col(\"legalBases\").list.len().alias(\"num_lb\")]).select(\"num_lb\").sort(\"num_lb\").collect().to_pandas()\n",
     "ax = sns.histplot(data=num_lb, x=\"num_lb\", bins=num_lb[\"num_lb\"].nunique())\n",
     "ax.set(xlabel=\"#Legal bases\", ylabel=\"Count\", yscale=\"log\", title=\"#Legal bases per judgement\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b1f2f3de",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "raw_text_ds = load_dataset(\"parquet\", data_dir=\"../../data/datasets/pl/raw/\", columns=[\"_id\", \"text\"])\n",
     "raw_text_ds = raw_text_ds.filter(lambda x: x[\"text\"] is not None)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "030652c5",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3-8B\")\n",
     "\n",
@@ -402,20 +374,14 @@
     "    return {\"length\": tokenized[\"length\"]}\n",
     "\n",
     "raw_text_ds = raw_text_ds.map(tokenize, batched=True, batch_size=16, remove_columns=[\"text\"], num_proc=20)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b8f46bd1",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "judgement_len = raw_text_ds[\"train\"].to_pandas()\n",
     "\n",
@@ -423,20 +389,14 @@
     "ax.set(xlabel=\"#Tokens\", ylabel=\"Count\", title=\"#Tokens distribution in judgements (llama-3 tokenizer)\", yscale=\"log\")\n",
     "ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{int(x/1_000)}k'))\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "4b180955",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "per_type_tokens = raw_ds.fill_null(value=\"<null>\").select([\"_id\", \"type\"]).collect().to_pandas().set_index(\"_id\").join(judgement_len.set_index(\"_id\"))\n",
     "\n",
@@ -444,28 +404,14 @@
     "ax = sns.boxenplot(data=per_type_tokens, y=\"type\", x=\"length\")\n",
     "ax.set(xscale=\"log\", title=\"Judgement token count per type\", xlabel=\"#Tokens\", ylabel=\"Type\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
   }
  },
  "nbformat": 4,
diff --git a/nbs/Data/03_Dataset_Description_Instruct.ipynb b/nbs/Data/03_Dataset_Description_Instruct.ipynb
index 82c1da7..998b302 100644
--- a/nbs/Data/03_Dataset_Description_Instruct.ipynb
+++ b/nbs/Data/03_Dataset_Description_Instruct.ipynb
@@ -2,14 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "initial_id",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import datasets\n",
     "import transformers\n",
@@ -30,25 +26,17 @@
     "transformers.logging.set_verbosity_error()\n",
     "datasets.logging.set_verbosity_error()\n",
     "datasets.utils.disable_progress_bars()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3105d222",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "ds = load_dataset(\"JuDDGES/pl-court-instruct\") "
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "markdown",
@@ -132,11 +120,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3f161970acf83cfa",
    "metadata": {},
-   "source": "display(ds[\"train\"][0])",
    "outputs": [],
-   "execution_count": null
+   "source": "display(ds[\"train\"][0])"
   },
   {
    "cell_type": "markdown",
@@ -164,8 +152,11 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "id": "ee96bab3205ad17a",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "data = []\n",
     "for split in ds.keys():\n",
@@ -174,25 +165,23 @@
     "df = pd.DataFrame(data)\n",
     "df[\"% samples\"] = (df[\"# samples\"] / df[\"# samples\"].sum() * 100).round(2)\n",
     "# print(df.to_markdown(index=False))"
-   ],
-   "id": "ee96bab3205ad17a",
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "id": "4ee99a119109fc75",
+   "metadata": {},
    "source": [
     "| split   |   # samples |   % samples |\n",
     "|:--------|------------:|------------:|\n",
     "| train   |      238851 |       99.17 |\n",
     "| test    |        2000 |        0.83 |"
-   ],
-   "id": "4ee99a119109fc75"
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "id": "970a616415592b60",
+   "metadata": {},
    "source": [
     "\n",
     "\n",
@@ -289,8 +278,7 @@
     "### Citation Information\n",
     "\n",
     "[More Information Needed]"
-   ],
-   "id": "970a616415592b60"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -301,19 +289,22 @@
    ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "id": "bd1df108f7be20e5",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "data = yaml.safe_load(ds[\"train\"][\"output\"][0].replace(\"```yaml\", \"\").replace(\"```\", \"\"))\n",
     "data[\"date\"] = pd.to_datetime(data[\"date\"])"
-   ],
-   "id": "bd1df108f7be20e5",
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "id": "d29a063bc04e4df5",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def parse_output(output: str) -> dict:\n",
     "    data = yaml.safe_load(output.replace(\"```yaml\", \"\").replace(\"```\", \"\"))\n",
@@ -321,38 +312,38 @@
     "    return data\n",
     "\n",
     "ds = ds.map(parse_output, input_columns=\"output\", num_proc=20)"
-   ],
-   "id": "d29a063bc04e4df5",
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "id": "cd31a01d116567",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "pl_ds = pl.concat([pl.from_arrow(ds[\"train\"].data.table), pl.from_arrow(ds[\"test\"].data.table)])\n",
     "pl_ds = pl_ds.with_columns(pl.Series(name=\"subset\", values=[\"train\"] * len(ds[\"train\"]) + [\"test\"] * len(ds[\"test\"]))) "
-   ],
-   "id": "cd31a01d116567",
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "5c2f63ac",
    "metadata": {},
+   "outputs": [],
    "source": [
     "court_distribution = pl_ds.select([\"subset\", \"court_name\"]).group_by([\"subset\", \"court_name\"]).len().sort(\"len\", descending=True).to_pandas()\n",
     "ax = sns.histplot(data=court_distribution, x=\"len\", hue=\"subset\", log_scale=True, kde=True, stat=\"percent\", common_norm=False )\n",
     "ax.set(title=\"Distribution of judgments per court\", xlabel=\"#Judgements in single court\", ylabel=\"percent\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "execution_count": null,
+   "id": "4201a1725cbbca26",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "judgements_per_year = pl_ds.select([\"subset\", \"date\"])[[\"subset\", \"date\"]]\n",
     "judgements_per_year = judgements_per_year.with_columns(judgements_per_year[\"date\"].dt.year()) \n",
@@ -365,34 +356,27 @@
     "ax.set(xlabel=\"Year\", ylabel=\"% Judgements\", title=\"Yearly Number of Judgements\", yscale=\"log\")\n",
     "plt.xticks(rotation=90)\n",
     "plt.show()"
-   ],
-   "id": "4201a1725cbbca26",
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e0801346",
    "metadata": {},
+   "outputs": [],
    "source": [
     "num_judges = pl_ds.with_columns([pl.col(\"judges\").list.len().alias(\"num_judges\")]).select([\"subset\", \"num_judges\"]).to_pandas()\n",
     "ax = sns.histplot(data=num_judges, x=\"num_judges\", hue=\"subset\", bins=num_judges[\"num_judges\"].nunique(), stat=\"percent\", common_norm=False)\n",
     "ax.set(xlabel=\"#Judges per judgement\", ylabel=\"%\", title=\"#Judges per single judgement\")\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "030652c5",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3-8B\")\n",
     "\n",
@@ -401,20 +385,14 @@
     "    return {\"length\": tokenized[\"length\"]}\n",
     "\n",
     "ds = ds.map(tokenize, batched=True, batch_size=16, remove_columns=[\"context\"], num_proc=20)"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b8f46bd1",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "metadata": {},
+   "outputs": [],
    "source": [
     "context_len_train = ds[\"train\"].to_pandas()\n",
     "context_len_train[\"subset\"] = \"train\"\n",
@@ -426,28 +404,14 @@
     "ax.set(xlabel=\"#Tokens\", ylabel=\"Count\", title=\"#Tokens distribution in context (llama-3 tokenizer)\", yscale=\"log\")\n",
     "ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{int(x/1_000)}k'))\n",
     "plt.show()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
   }
  },
  "nbformat": 4,