Skip to content

Commit

Permalink
moves normalize funct into filter-cameroon (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
KameniAlexNea authored and billmetangmo committed Nov 10, 2022
1 parent 0ecee18 commit 8e14615
Showing 1 changed file with 47 additions and 7 deletions.
54 changes: 47 additions & 7 deletions etl/filter-cameroon.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@
" .pipe(select_relevant_columns) \\\n",
" .pipe(add_column_adrs)\n",
"\n",
"df2"
"df2.sample(5)"
]
},
{
Expand Down Expand Up @@ -915,7 +915,7 @@
"\n",
"# get_info(\"W212001727\")\n",
"# get_dept_region(30913)\n",
"df2"
"df2.sample(5)"
]
},
{
Expand Down Expand Up @@ -1368,7 +1368,42 @@
"\n",
"df2 = df2.pipe(add_lat_lon) \\\n",
" .pipe(format_libelle_for_gogocarto)\n",
"df2"
"df2.sample(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1de8d9a8",
"metadata": {},
"outputs": [],
"source": [
"def remove_space_at_the_end(x: str):\n",
" if x is not None:\n",
" return x.strip()\n",
"\n",
"def replace_double_quote(x: str):\n",
" if x is not None:\n",
" return x.replace(\"\\\"\\\"\", \"'\")\n",
"\n",
"def normalize(data: pd.DataFrame):\n",
" text_columns = [\n",
" \"titre\", \"objet\", \"social_object1_libelle\", \"social_object2_libelle\"\n",
" ]\n",
" data[text_columns] = data[text_columns].apply(\n",
" lambda x: x.apply(remove_space_at_the_end)\n",
" )\n",
" data[text_columns] = data[text_columns].apply(\n",
" lambda x: x.apply(replace_double_quote)\n",
" )\n",
" data[\"titre\"] = data[\"titre\"].apply(lambda x: x.upper())\n",
" data[\"objet\"] = data[\"objet\"].apply(lambda x: x.lower())\n",
"\n",
" return data\n",
"\n",
"df2 = df2.pipe(normalize)\n",
"\n",
"df2.sample(5)"
]
},
{
Expand All @@ -1389,9 +1424,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "etl",
"display_name": "Python 3.9.12 ('base')",
"language": "python",
"name": "etl"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1403,11 +1438,16 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.9.12"
},
"toc-autonumbering": true,
"toc-showcode": false,
"toc-showmarkdowntxt": false
"toc-showmarkdowntxt": false,
"vscode": {
"interpreter": {
"hash": "d9a8acb4f733d3596df9f6fac9daff15e014d11794ebc65488d1c191c94698fd"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
Expand Down

0 comments on commit 8e14615

Please sign in to comment.