From 8e14615c0f22d0676f539e5c2c2155726fd4dda4 Mon Sep 17 00:00:00 2001 From: KameniAlexNea Date: Sat, 5 Nov 2022 12:26:34 +0100 Subject: [PATCH] moves normalize funct into filter-cameroon (#15) --- etl/filter-cameroon.ipynb | 54 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/etl/filter-cameroon.ipynb b/etl/filter-cameroon.ipynb index 53bbbd2..9495b50 100644 --- a/etl/filter-cameroon.ipynb +++ b/etl/filter-cameroon.ipynb @@ -476,7 +476,7 @@ " .pipe(select_relevant_columns) \\\n", " .pipe(add_column_adrs)\n", "\n", - "df2" + "df2.sample(5)" ] }, { @@ -915,7 +915,7 @@ "\n", "# get_info(\"W212001727\")\n", "# get_dept_region(30913)\n", - "df2" + "df2.sample(5)" ] }, { @@ -1368,7 +1368,42 @@ "\n", "df2 = df2.pipe(add_lat_lon) \\\n", " .pipe(format_libelle_for_gogocarto)\n", - "df2" + "df2.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1de8d9a8", + "metadata": {}, + "outputs": [], + "source": [ + "def remove_space_at_the_end(x: str):\n", + " if x is not None:\n", + " return x.strip()\n", + "\n", + "def replace_double_quote(x: str):\n", + " if x is not None:\n", + " return x.replace(\"\\\"\\\"\", \"'\")\n", + "\n", + "def normalize(data: pd.DataFrame):\n", + " text_columns = [\n", + " \"titre\", \"objet\", \"social_object1_libelle\", \"social_object2_libelle\"\n", + " ]\n", + " data[text_columns] = data[text_columns].apply(\n", + " lambda x: x.apply(remove_space_at_the_end)\n", + " )\n", + " data[text_columns] = data[text_columns].apply(\n", + " lambda x: x.apply(replace_double_quote)\n", + " )\n", + " data[\"titre\"] = data[\"titre\"].apply(lambda x: x.upper())\n", + " data[\"objet\"] = data[\"objet\"].apply(lambda x: x.lower())\n", + "\n", + " return data\n", + "\n", + "df2 = df2.pipe(normalize)\n", + "\n", + "df2.sample(5)" ] }, { @@ -1389,9 +1424,9 @@ ], "metadata": { "kernelspec": { - "display_name": "etl", + "display_name": "Python 3.9.12 ('base')", "language": "python", - "name": "etl" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1403,11 +1438,16 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.12" }, "toc-autonumbering": true, "toc-showcode": false, - "toc-showmarkdowntxt": false + "toc-showmarkdowntxt": false, + "vscode": { + "interpreter": { + "hash": "d9a8acb4f733d3596df9f6fac9daff15e014d11794ebc65488d1c191c94698fd" + } + } }, "nbformat": 4, "nbformat_minor": 5